# AWSTATS SEARCH ENGINES DATABASE #------------------------------------------------------------------------------ # If you want to add a Search Engine to extend AWStats database detection capabilities, # you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in # SearchEnginesHashLib. # An entry if known in SearchEnginesKnownUrl is also welcome. # # to eldy: Please check if the following description is correct: # You need the following information to specify a search engine: # (a) A regular expression that matches the referrer string of the # search engine. Unclear: What about slashes in the name of # a search engine, e.g. as in 'ecosia.com/search'. Seems that # AWStats will non find search strings containing a slash. # Maybe use a search string without a slash, and - if necessary - # an entry in %NotSearchEnginesKeys , if this search string # matches entries that are not search engines. # (b) A unique string to identify the search engine within AWStats # (c) A regular expression that finds the start of the query part in the # referrer string # (d) A HTML-fragment that goes into the reports generated by AWStats which # identifies the search engine to human reader of the report. In the # simplest case this is a string containing the name of the search # engine. You can also provide a hypertext clause that presents the # name together with a link to the search engine. # # The regular expression (a) goes into SearchEnginesSearchIDOrder_list1 # or ..._list2. List 1 contains common search engines, list 2 those # that are not so often used. # # SearchEnginesHashID contains to consecutive entries for each search # engine: The regular expression (a) followed bei the search engine # identifier (b) # # SearchEnginesKnownUrl specifies how to find the start of the query. # For each search engine you enter the search engine identifier (b) # followed by the regular expression (c). Unclear: It is possible to # omit this entry. If you do this, how will AWStats find the start of # the query? # # SearchEnginesHashLib contains also two entries for each search engine: # The search engine identifier (b) followed by the HTML-Fragment (d) # # There are search engines that do not use a query part in their URLs. # They put the search expression in the main part of the URL instead. # AWStats is able to handle these cases. They are specified as described # above, except the following two things: # - The regular expression (c) searches the complete URL and not only # the query part. # - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is # necessary. # # # AWStats runs a sanity check of the contents of search_engines.pm. This # check detects the following things: # - Inconsistencies (number of entries) # It does not detect the following errors: # - If the HTML-Fragment (d) is syntactically incorrect. # #------------------------------------------------------------------------------ # 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html # added minor italian search engines # arianna http://arianna.libero.it/ # supereva http://search.supereva.com/ # kataweb http://kataweb.it/ # corrected uk looksmart # 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', # to # 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', # corrected spelling # internationnal -> international # added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to # avoid counting gmail referrals as search engine traffic # 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html # avoid counting babelfish.altavista referrals as search engine traffic # avoid counting translate.google referrals as search engine traffic # 2005-11-20 Sean Carlos # added missing 'tiscali','key=', entry. Check order # 2005-11-22 Sean Carlos # added Google Base & Froogle. Froogle not tested. # 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html # added biglotron.com (France) # added blingo http://www.blingo.com/ # added Clusty & Vivisimo # added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783] # added GPU p2p search http://search.centraldatabase.org/ # added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688] # added Ask group's "mysearch" # added sify.com (India) # added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603] # Ask changes: # - added Ask Japan (ask.jp) # - break out Ask new country level variants (DE, ES, FR, IT, NL) # - updated Ask name from Ask Jevees # - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444] # - updated Ask uk (new uk.ask.com added to older ask.co.uk) # updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912] # for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch # to AWStats to allow untranslated html. Otherwise html will appear instead of link. # reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer # exists https://sourceforge.net/forum/message.php?msg_id=3025426 # 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html # added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden) # added Alice Internal Search (blends data with Google?) search.alice.it.master:10005 # added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104 # To do: add more extensive IP list; keywords not yet detected. # added icerocket.com blog search http://www.icerocket.com/ # added live.com (msn) http://www.live.com/ # added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the # search engine list but the actual search words are not available. # added netluchs.de http://www.netluchs.de/ # added sphere.com blog search http://www.sphere.com/ # added wwweasel.de http://wwweasel.de # added Yahoo Mindset! http://mindset.research.yahoo.com/ # updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland) # 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html # added Google cache IPs 64.233.183.104 & 66.102.7.104 # 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html # anzwers.com.au # schoenerbrausen.de http://www.schoenerbrausen.de/ # added Google cache IP 216.239.59.104 # answerbus http://www.answerbus.com/ (does not provide keywords) # 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html # added Google cache IP 66.102.9.104, 64.233.161.104 # 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html # added Alice Search search.alice.it # added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web" # added googlee.com, variant of Google # added gotuneed http://www.gotuneed.com/ Italian search engine, in beta # added icq.com # added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out? # added Nusearch http://www.nusearch.com/ # added Polymeta www.polymeta.hu (does not provide keywords) # added scroogle http://www.scroogle.org/ (does not always provide keywords) # added Tango http://tango.hu/search.php?st=0&q=jeles+napok # Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104 # 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104 # 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104 # 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104 # 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html # added Onet.pl http://szukaj.onet.pl/ # corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/ # 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html # Additional Polish Search Engines: # added Dodaj.pl http://www.dodaj.pl/ # added Gazeta.pl http://szukaj.gazeta.pl/ # added Gery.pl http://szukaj.gery.pl/ # added Hoga.pl http://www.hoga.pl/ # added Interia.pl http://www.google.interia.pl/ # added Katalog.Onet.pl http://katalog.onet.pl/ # added NetSprint.pl http://www.netsprint.pl/ # added o2.pl http://szukaj2.o2.pl/ # added Polska http://szukaj.polska.pl/ # added Szukacz http://www.szukacz.pl/ # added Wow.pl http://szukaj.wow.pl/ # added Sagool http://sagool.jp/ # 2006-08-25 Social Bookmarks # International # added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer? # added stumbleupon.com - No keywords supplied. # added swik.net # added digg. Keywords sometimes supplied. # Italy # added segnalo.alice.it - No keywords supplied. # added ineffabile.it - No keywords supplied. # added filter for google groups. Attempt to parse group name as keyword. # 2006-09-14 # added Eniro Sverige http://www.eniro.se/ # added MyWebSearch http://search.mywebsearch.com/ # added Teecno http://www.teecno.it/ Italian Open Source Search Engine #package AWSSE; # 2006-09-25 (Gabor Moizes) # added 4-counter (Google alternative) http://4-counter.com/ # added Googlecom (Google alternative) http://googlecom.com/ # added Goggle (Google alternative) http://goggle.co.hu/ # added Comet toolbar http://as.starware.com # added new IP for Yahoo: 216.109.125.130 # added Ledix http://ledix.net/ # added AT&T search (powered by Google) http://www.att.net/ # added Keresolap (Hungarian search engine) http://www.keresolap.hu/ # added Mozbot (French search engine) http://www.mozbot.fr/ # added Zoznam (Slovak search engine) http://www.zoznam.sk/ # added sapo.pt (Portuguese search engine) http://www.sapo.pt/ # added shaw.ca (powered by Google) http://start.shaw.ca/ # added Searchalot http://www.searchalot.com/ # added Copernic http://www.copernic.com/ # added 216.109.125.130 to Yahoo # added 66.218.69.11 to Yahoo # added Avantfind http://www.avantfind.com/ # added Steadysearch http://www.steadysearch.com/ # added Steadysearch http://www.steady-search.com/ # modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104 # SearchEnginesSearchIDOrder # It contains all matching criteria to search for in log fields. This list is # used to know in which order to search Search Engines IDs. # Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more # Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more # Note: Regex IDs are in lower case and ' ' and '+' are changed into '_' #------------------------------------------------------------------------------ @SearchEnginesSearchIDOrder_list1=( # Major international search engines 'google\.[\w.]+/products', 'base\.google\.', 'froogle\.google\.', 'groups\.google\.', 'images\.google\.', 'google\.', 'googlee\.', 'googlecom\.com', 'goggle\.co\.hu', '216\.239\.32\.20', '216\.239\.(35|37|39|51)\.100', '216\.239\.(35|37|39|51)\.101', '216\.239\.5[0-9]\.104', '64\.233\.1[0-9]{2}\.104', '66\.102\.[1-9]\.104', '66\.249\.93\.104', '72\.14\.2[0-9]{2}\.104', 'msn\.', 'live\.com', 'bing\.', 'voila\.', 'mindset\.research\.yahoo', 'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)', 'search\.aol\.co', 'tiscali\.', 'lycos\.', 'alexa\.com', 'alltheweb\.com', 'altavista\.', 'a9\.com', 'dmoz\.org', 'netscape\.', 'search\.terra\.', 'www\.search\.com', 'search\.sli\.sympatico\.ca', 'excite\.' ); @SearchEnginesSearchIDOrder_list2=( # Minor international search engines '4\-counter\.com', 'att\.net', 'bungeebonesdotcom', 'northernlight\.', 'hotbot\.', 'kvasir\.', 'webcrawler\.', 'metacrawler\.', 'go2net\.com', '(^|\.)go\.com', 'euroseek\.', 'looksmart\.', 'spray\.', 'nbci\.com\/search', 'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section) 'es\.ask.\com', 'fr\.ask.\com', 'it\.ask.\com', 'nl\.ask.\com', 'uk\.ask.\com', '(^|\.)ask\.com', 'atomz\.', 'overture\.com', # Replace 'goto\.com','Goto.com', 'teoma\.', 'findarticles\.com', 'infospace\.com', 'mamma\.', 'dejanews\.', 'dogpile\.com', 'wisenut\.com', 'ixquick\.com', 'search\.earthlink\.net', 'i-une\.com', 'blingo\.com', 'centraldatabase\.org', 'clusty\.com', 'mysearch\.', 'vivisimo\.com', 'kartoo\.com', 'icerocket\.com', 'sphere\.com', 'ledix\.net', 'start\.shaw\.ca', 'searchalot\.com', 'copernic\.com', 'avantfind\.com', 'steadysearch\.com', 'steady-search\.com', 'claro-search\.com', 'www1\.search-results\.com', 'www\.holasearch\.com', 'search\.conduit\.com', 'static\.flipora\.com', '(?:www[12]?|mixidj)\.delta-search\.com', 'start\.iminent\.com', 'www\.searchmobileonline\.com', 'int\.search-results\.com', 'www2\.inbox\.com', 'www\.govome\.com', 'find1friend\.com', 'start\.mysearchdial\.com', 'go\.speedbit\.com', 'search\.certified-toolbar\.com', 'search\.sweetim\.com', 'search\.searchcompletion\.com', 'en\.eazel\.com', 'sr\.searchfunmoods\.com', '173\.194\.35\.177', 'dalesearch\.com', 'sweetpacks-search\.com', 'searchgol\.com', 'duckduckgo\.com', 'sr\.facemoods\.com', 'shoppstop\.com', 'searchya\.com', 'picsearch\.de', 'webssearches\.com', 'zapmeta\.de', 'localmoxie\.com', # Chello Portals 'chello\.at', 'chello\.be', 'chello\.cz', 'chello\.fr', 'chello\.hu', 'chello\.nl', 'chello\.no', 'chello\.pl', 'chello\.se', 'chello\.sk', 'chello', # required as catchall for new countries not yet known # Mirago 'mirago\.be', 'mirago\.ch', 'mirago\.de', 'mirago\.dk', 'es\.mirago\.com', 'mirago\.fr', 'mirago\.it', 'mirago\.nl', 'no\.mirago\.com', 'mirago\.se', 'mirago\.co\.uk', 'mirago', # required as catchall for new countries not yet known 'answerbus\.com', 'icq\.com\/search', 'nusearch\.com', 'goodsearch\.com', 'scroogle\.org', 'questionanswering\.com', 'mywebsearch\.com', 'as\.starware\.com', # Social Bookmarking Services 'del\.icio\.us', 'digg\.com', 'stumbleupon\.com', 'swik\.net', 'segnalo\.alice\.it', 'ineffabile\.it', # Minor Australian search engines 'anzwers\.com\.au', # Minor brazilian search engines 'engine\.exe', 'miner\.bol\.com\.br', # Minor chinese search engines '\.baidu\.com', # baidu search portal '\.vnet\.cn', # powered by MSN '\.soso\.com', # powered by Google '\.sogou\.com', # powered by Sohu '\.3721\.com', # powered by Yahoo! 'iask\.com', # powered by Sina '\.accoona\.com', # Accoona '\.163\.com', # powered by Google '\.zhongsou\.com', # zhongsou search portal # Minor czech search engines 'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz', 'isearch\.avg\.com', # Minor danish search-engines 'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk', # Minor dutch search engines 'ilse\.','vindex\.', # Minor english search engines '(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', 'search\.fbdownloader\.com', 'search\.babylon\.com', 'my\.allgameshome\.com', # Minor finnish search engines 'haku\.www\.fi', # Minor french search engines 'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', 'toile\.com', 'biglotron\.com', 'mozbot\.fr', # Minor german search engines 'sucheaol\.aol\.de', 'o2suche\.aol\.de', 'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de', 'suchen\.abacho\.de','(brisbane|suche)\.t-online\.de','allesklar\.de','meinestadt\.de', '212\.227\.33\.241', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', 'wwweasel\.de', 'netluchs\.de', 'schoenerbrausen\.de', 'suche\.gmx\.net', 'suche\.gmx\.at', 'ecosia\.org', 'de\.aolsearch\.com', 'suche\.aol\.de', 'www\.startxxl\.com', 'www\.benefind\.de', 'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seem to consider the host part of an URL only 'de\.wow\.com', 'www\.vlips\.de', 'metager\.de', 'search\.1und1\.de', 'sm\.de', 'sumaja\.de', 'navigationshilfe\.t-online\.de', 'umfis\.de', 'fastbot\.de', 'tixuma\.de', # Minor Hungarian search engines 'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', 'tango\.hu', 'keresolap\.hu', 'kereso\.startlap\.hu', 'polymeta\.hu', # Minor Indian search engines 'sify\.com', # Minor Italian search engines 'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com', 'godado','jumpy\.it','shinyseek\.it','teecno\.it', # Minor Israeli search engines 'search\.genieo\.com', # Minor Japanese search engines 'ask\.jp','sagool\.jp', # Minor Norwegian search engines 'sok\.start\.no', 'eniro\.no', # Minor Polish search engines 'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl', # Minor russian search engines 'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru', 'go\.mail\.ru', # Minor Swedish search engines 'evreka\.passagen\.se','eniro\.se', # Minor Slovak search engines 'zoznam\.sk', # Minor Portuguese search engines 'sapo\.pt', # Minor swiss search engines 'search\.ch', 'search\.bluewin\.ch', # Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines 'pogodak\.' ); @SearchEnginesSearchIDOrder_listgen=( # Generic search engines 'search\..*\.\w+' ); # NotSearchEnginesKeys # If a search engine key is found, we check its exclude list to know if it's # really a search engine #------------------------------------------------------------------------------ %NotSearchEnginesKeys=( 'altavista\.'=>'babelfish\.altavista\.', 'google\.'=>'mail\.google\.', 'google\.'=>'translate\.google\.', 'google\.'=>'code\.google\.', 'msn\.'=>'hotmail\.msn\.', 'tiscali\.'=>'mail\.tiscali\.', 'yahoo\.'=>'mail\.yahoo\.', 'yandex\.'=>'direct\.yandex\.' ); # SearchEnginesHashID # Each Search Engine Search ID is associated to an AWStats id string #------------------------------------------------------------------------------ %SearchEnginesHashID = ( # Major international search engines 'google\.[\w.]+/products','google_products', 'base\.google\.','google_base', 'froogle\.google\.','google_froogle', 'groups\.google\.','google_groups', 'images\.google\.','google_image', 'google\.','google', 'googlee\.','google', 'googlecom\.com','google', 'goggle\.co\.hu','google', '216\.239\.32\.20', 'google', '216\.239\.(35|37|39|51)\.100','google_cache', '216\.239\.(35|37|39|51)\.101','google_cache', '216\.239\.5[0-9]\.104','google_cache', '64\.233\.1[0-9]{2}\.104','google_cache', '66\.102\.[1-9]\.104','google_cache', '66\.249\.93\.104','google_cache', '72\.14\.2[0-9]{2}\.104','google_cache', 'msn\.','msn', 'live\.com','live', 'bing\.','bing', 'voila\.','voila', 'mindset\.research\.yahoo','yahoo_mindset', 'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo', 'lycos\.','lycos', 'alexa\.com','alexa', 'alltheweb\.com','alltheweb', 'altavista\.','altavista', 'a9\.com','a9', 'dmoz\.org','dmoz', 'netscape\.','netscape', 'search\.terra\.','terra', 'www\.search\.com','search.com', 'tiscali\.','tiscali', 'search\.aol\.co','aol', 'search\.sli\.sympatico\.ca','sympatico', 'excite\.','excite', # Minor international search engines '4\-counter\.com','google4counter', 'att\.net','att', 'bungeebonesdotcom','bungeebonesdotcom', 'northernlight\.','northernlight', 'hotbot\.','hotbot', 'kvasir\.','kvasir', 'webcrawler\.','webcrawler', 'metacrawler\.','metacrawler', 'go2net\.com','go2net', '(^|\.)go\.com','go', 'euroseek\.','euroseek', 'looksmart\.','looksmart', 'spray\.','spray', 'nbci\.com\/search','nbci', 'de\.ask.\com','askde', # break out Ask country specific engines. 'es\.ask.\com','askes', 'fr\.ask.\com','askfr', 'it\.ask.\com','askit', 'nl\.ask.\com','asknl', 'uk\.ask.\com','askuk', '(^|\.)ask\.co\.uk','askuk', '(^|\.)ask\.com','ask', 'atomz\.','atomz', 'overture\.com','overture', # Replace 'goto\.com','Goto.com', 'teoma\.','teoma', 'findarticles\.com','findarticles', 'infospace\.com','infospace', 'mamma\.','mamma', 'dejanews\.','dejanews', 'dogpile\.com','dogpile', 'wisenut\.com','wisenut', 'ixquick\.com','ixquick', 'search\.earthlink\.net','earthlink', 'i-une\.com','iune', 'blingo\.com','blingo', 'centraldatabase\.org','centraldatabase', 'clusty\.com','clusty', 'mysearch\.','mysearch', 'vivisimo\.com','vivisimo', 'kartoo\.com','kartoo', 'icerocket\.com','icerocket', 'sphere\.com','sphere', 'ledix\.net','ledix', 'start\.shaw\.ca','shawca', 'searchalot\.com','searchalot', 'copernic\.com','copernic', 'avantfind\.com','avantfind', 'steadysearch\.com','steadysearch', 'steady-search\.com','steadysearch', 'claro-search\.com','clarosearch', 'www1\.search-results\.com', 'searchresults', 'www\.holasearch\.com', 'holasearch', 'search\.conduit\.com', 'conduit', 'static\.flipora\.com', 'flipora', '(?:www[12]?|mixidj)\.delta-search\.com', 'delta-search', 'start\.iminent\.com', 'iminent', 'www\.searchmobileonline\.com', 'searchmobileonline', 'int\.search-results\.com', 'nortonsavesearch', 'www2\.inbox\.com', 'inbox', 'www\.govome\.com', 'govome', 'find1friend\.com', 'find1friend', 'start\.mysearchdial\.com', 'mysearchdial', 'go\.speedbit\.com', 'speedbit', 'search\.certified-toolbar\.com', 'certifiedtoolbarsearch', 'search\.sweetim\.com', 'sweetim', 'search\.searchcompletion\.com', 'searchcompletion', 'en\.eazel\.com','eazelsearch', 'sr\.searchfunmoods\.com', 'searchfunmoods', '173\.194\.35\.177', 'googleByIP', 'dalesearch\.com', 'dalesearch', 'sweetpacks-search\.com', 'sweetpacks', 'searchgol\.com', 'searchgol', 'duckduckgo\.com', 'duckduckgo', 'sr\.facemoods\.com', 'facemoods', 'shoppstop\.com', 'shoppstop', 'searchya\.com', 'searchya', 'picsearch\.de', 'picsearch', 'webssearches\.com', 'webssearches', 'zapmeta\.de', 'zapmeta', 'localmoxie\.com', 'localmoxie', # Chello Portals 'chello\.at','chelloat', 'chello\.be','chellobe', 'chello\.cz','chellocz', 'chello\.fr','chellofr', 'chello\.hu','chellohu', 'chello\.nl','chellonl', 'chello\.no','chellono', 'chello\.pl','chellopl', 'chello\.se','chellose', 'chello\.sk','chellosk', 'chello','chellocom', # Mirago 'mirago\.be','miragobe', 'mirago\.ch','miragoch', 'mirago\.de','miragode', 'mirago\.dk','miragodk', 'es\.mirago\.com','miragoes', 'mirago\.fr','miragofr', 'mirago\.it','miragoit', 'mirago\.nl','miragonl', 'no\.mirago\.com','miragono', 'mirago\.se','miragose', 'mirago\.co\.uk','miragocouk', 'mirago','mirago', # required as catchall for new countries not yet known 'answerbus\.com','answerbus', 'icq\.com\/search','icq', 'nusearch\.com','nusearch', 'goodsearch\.com','goodsearch', 'scroogle\.org','scroogle', 'questionanswering\.com','questionanswering', 'mywebsearch\.com','mywebsearch', 'as\.starware\.com','comettoolbar', # Social Bookmarking Services 'del\.icio\.us','delicious', 'digg\.com','digg', 'stumbleupon\.com','stumbleupon', 'swik\.net','swik', 'segnalo\.alice\.it','segnalo', 'ineffabile\.it','ineffabile', # Minor Australian search engines 'anzwers\.com\.au','anzwers', # Minor brazilian search engines 'engine\.exe','engine', 'miner\.bol\.com\.br','miner', # Minor chinese search engines '\.baidu\.com','baidu', 'iask\.com','iask', '\.accoona\.com','accoona', '\.3721\.com','3721', '\.163\.com','netease', '\.soso\.com','soso', '\.zhongsou\.com','zhongsou', '\.vnet\.cn','vnet', '\.sogou\.com','sogou', # Minor czech search engines 'atlas\.cz','atlas', 'seznam\.cz','seznam', 'quick\.cz','quick', 'centrum\.cz','centrum', 'jyxo\.(cz|com)','jyxo', 'najdi\.to','najdi', 'redbox\.cz','redbox', 'isearch\.avg\.com', 'avgsearch', # Minor danish search-engines 'opasia\.dk','opasia', 'danielsen\.com','danielsen', 'sol\.dk','sol', 'jubii\.dk','jubii', 'find\.dk','finddk', 'edderkoppen\.dk','edderkoppen', 'netstjernen\.dk','netstjernen', 'orbis\.dk','orbis', 'tyfon\.dk','tyfon', '1klik\.dk','1klik', 'ofir\.dk','ofir', # Minor dutch search engines 'ilse\.','ilse', 'vindex\.','vindex', # Minor english search engines 'bbc\.co\.uk/cgi-bin/search','bbc', 'ifind\.freeserve','freeserve', 'looksmart\.co\.uk','looksmartuk', 'splut\.','splut', 'spotjockey\.','spotjockey', 'ukdirectory\.','ukdirectory', 'ukindex\.co\.uk','ukindex', 'ukplus\.','ukplus', 'searchy\.co\.uk','searchy', 'search\.fbdownloader\.com','fbdownloader', 'search\.babylon\.com', 'babylon', 'my\.allgameshome\.com', 'allgameshome', # Minor finnish search engines 'haku\.www\.fi','haku', # Minor french search engines 'recherche\.aol\.fr','aolfr', 'ctrouve\.','ctrouve', 'francite\.','francite', '\.lbb\.org','lbb', 'rechercher\.libertysurf\.fr','libertysurf', 'search[\w\-]+\.free\.fr','free', 'recherche\.club-internet\.fr','clubinternet', 'toile\.com','toile', 'biglotron\.com', 'biglotron', 'mozbot\.fr', 'mozbot', # Minor german search engines 'sucheaol\.aol\.de','aolde', 'o2suche\.aol\.de','o2aolde', 'fireball\.de','fireball', 'infoseek\.de','infoseek', 'suche\d?\.web\.de','webde', '[a-z]serv\.rrzn\.uni-hannover\.de','meta', 'suchen\.abacho\.de','abacho', '(brisbane|suche)\.t-online\.de','t-online', 'allesklar\.de','allesklar', 'meinestadt\.de','meinestadt', '212\.227\.33\.241','metaspinner', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de', 'wwweasel\.de','wwweasel', 'netluchs\.de','netluchs', 'schoenerbrausen\.de','schoenerbrausen', 'suche\.gmx\.net', 'gmxsuche', 'suche\.gmx\.at', 'gmxsuche_at', 'ecosia\.org', 'ecosiasearch', 'de\.aolsearch\.com', 'aolsearch', 'suche\.aol\.de', 'aolsuche', 'www\.startxxl\.com', 'startxxl', 'www\.benefind\.de', 'benefind', 'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only 'de\.wow\.com', 'wowsearch', 'www\.vlips\.de', 'vlips_de', 'metager\.de', 'metager', 'search\.1und1\.de', 'search_1und1_de', 'sm\.de', 'smde', 'sumaja\.de', 'sumaja', 'navigationshilfe\.t-online\.de', 'navigationshilfe', 'umfis\.de', 'umfis', 'fastbot\.de', 'fastbot_de', 'tixuma\.de', 'tixuma_de', # Minor Hungarian search engines 'heureka\.hu','heureka', 'vizsla\.origo\.hu','origo', 'lapkereso\.hu','lapkereso', 'goliat\.hu','goliat', 'index\.hu','indexhu', 'wahoo\.hu','wahoo', 'webmania\.hu','webmania', 'search\.internetto\.hu','internetto', 'tango\.hu','tango_hu', 'keresolap\.hu','keresolap_hu', 'kereso\.startlap\.hu', 'startlap_hu', 'polymeta\.hu','polymeta_hu', # Minor Indian search engines 'sify\.com','sify', # Minor Italian search engines 'virgilio\.it','virgilio', 'arianna\.libero\.it','arianna', 'supereva\.com','supereva', 'kataweb\.it','kataweb', 'search\.alice\.it\.master','aliceitmaster', 'search\.alice\.it','aliceit', 'gotuneed\.com','gotuneed', 'godado','godado', 'jumpy\.it','jumpy\.it', 'shinyseek\.it','shinyseek\.it', 'teecno\.it','teecnoit', # Minor Israeli search engines 'search\.genieo\.com', 'genieo', # Minor Japanese search engines 'ask\.jp','askjp', 'sagool\.jp','sagool', # Minor Norwegian search engines 'sok\.start\.no','start', 'eniro\.no','eniro', # Minor Polish search engines 'szukaj\.wp\.pl','wp', 'szukaj\.onet\.pl','onetpl', 'dodaj\.pl','dodajpl', 'gazeta\.pl','gazetapl', 'gery\.pl','gerypl', 'netsprint\.pl\/hoga\-search','hogapl', 'netsprint\.pl','netsprintpl', 'interia\.pl','interiapl', 'katalog\.onet\.pl','katalogonetpl', 'o2\.pl','o2pl', 'polska\.pl','polskapl', 'szukacz\.pl','szukaczpl', 'wow\.pl','wowpl', # Minor russian search engines 'ya(ndex)?\.ru','yandex', 'aport\.ru','aport', 'rambler\.ru','rambler', 'turtle\.ru','turtle', 'metabot\.ru','metabot', 'go\.mail\.ru', 'mailru', # Minor Swedish search engines 'evreka\.passagen\.se','passagen', 'eniro\.se','enirose', # Minor Slovak search engines 'zoznam\.sk','zoznam', # Minor Portuguese search engines 'sapo\.pt','sapo', # Minor swiss search engines 'search\.ch','searchch', 'search\.bluewin\.ch','bluewin', # Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines 'pogodak\.','pogodak', # Generic search engines 'search\..*\.\w+','search' ); # SearchEnginesWithKeysNotInQuery # List of search engines that store keyword as page instead of query parameter #------------------------------------------------------------------------------ %SearchEnginesWithKeysNotInQuery=( 'a9',1, # www.a9.com/searchkey1%20searchkey2 'iminent',1 #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments ); # SearchEnginesKnownUrl # Known rules to extract keywords from a referrer search engine URL #------------------------------------------------------------------------------ %SearchEnginesKnownUrl=( # Most common search engines 'alexa','q=', 'alltheweb','q(|uery)=', 'altavista','q=', 'a9','a9\.com\/', 'dmoz','search=', 'google_products','(p|q|as_p|as_q)=', 'google_base','(p|q|as_p|as_q)=', 'google_froogle','(p|q|as_p|as_q)=', 'google_groups','group\/', # does not work 'google_image','(p|q|as_p|as_q)=', 'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:', 'google','(p|q|as_p|as_q)=', 'lycos','query=', 'msn','q=', 'live','q=', 'bing','q=', 'netscape','search=', 'tiscali','key=', 'aol','query=', 'terra','query=', 'voila','(kw|rdata)=', 'search.com','q=', 'yahoo_mindset','p=', 'yahoo','p=', 'sympatico', 'query=', 'excite','search=', # Minor international search engines 'google4counter','(p|q|as_p|as_q)=', 'att','qry=', 'bungeebonesdotcom','query=', 'go','qt=', 'askde','(ask|q)=', # break out Ask country specific engines. 'askes','(ask|q)=', 'askfr','(ask|q)=', 'askit','(ask|q)=', 'asknl','(ask|q)=', 'ask','(ask|q)=', 'atomz','sp-q=', 'euroseek','query=', 'findarticles','key=', 'go2net','general=', 'hotbot','mt=', 'infospace','qkw=', 'kvasir', 'q=', 'looksmart','key=', 'mamma','query=', 'metacrawler','general=', 'nbci','keyword=', 'northernlight','qr=', 'overture','keywords=', 'dogpile', 'q(|kw)=', 'spray','string=', 'teoma','q=', 'webcrawler','searchText=', 'wisenut','query=', 'ixquick', 'query=', 'earthlink', 'q=', 'iune','(keywords|q)=', 'blingo','q=', 'centraldatabase','query=', 'clusty','query=', 'mysearch','searchfor=', 'vivisimo','query=', # kartoo: No keywords passed in referring URL. 'kartoo','', 'icerocket','q=', 'sphere','q=', 'ledix','q=', 'shawca','q=', 'searchalot','q=', 'copernic','web\/', 'avantfind','keywords=', 'steadysearch','w=', 'clarosearch','q=', 'searchresults','q=', 'holasearch', 'q=', 'conduit', 'q=', 'flipora', 'q=', 'delta-search', 'q=', 'iminent', 'q=', 'searchmobileonline', 'q=', 'nortonsavesearch', 'q=', 'inbox', 'q(?:kw)?=', 'govome', 'q=', 'find1friend', 'q=', 'mysearchdial', 'q=', 'speedbit', 'q=', 'certifiedtoolbarsearch', 'q=', 'sweetim', 'q=', 'searchcompletion', 'q=', 'eazelsearch', 'q=', 'searchfunmoods', 'q=', 'googleByIP', 'q=', 'dalesearch', 'q=', 'sweetpacks', 'q=', 'searchgol', 'q=', 'duckduckgo', 'uddg=', 'facemoods', 'q=', 'shoppstop', 'keywords=', 'searchya', 'q=', 'picsearch', 'q=', 'webssearches', 'q=', 'zapmeta', 'query=', 'localmoxie', 'keyword=', # Chello Portals 'chelloat','q1=', 'chellobe','q1=', 'chellocz','q1=', 'chellofr','q1=', 'chellohu','q1=', 'chellonl','q1=', 'chellono','q1=', 'chellopl','q1=', 'chellose','q1=', 'chellosk','q1=', 'chellocom','q1=', # Mirago 'miragobe','(txtsearch|qry)=', 'miragoch','(txtsearch|qry)=', 'miragode','(txtsearch|qry)=', 'miragodk','(txtsearch|qry)=', 'miragoes','(txtsearch|qry)=', 'miragofr','(txtsearch|qry)=', 'miragoit','(txtsearch|qry)=', 'miragonl','(txtsearch|qry)=', 'miragono','(txtsearch|qry)=', 'miragose','(txtsearch|qry)=', 'miragocouk','(txtsearch|qry)=', 'mirago','(txtsearch|qry)=', 'answerbus','', # Does not provide query parameters 'icq','q=', 'nusearch','nusearch_terms=', 'goodsearch','Keywords=', 'scroogle','Gw=', # Does not always provide query parameters 'questionanswering','', 'mywebsearch','searchfor=', 'comettoolbar','qry=', # Social Bookmarking Services 'delicious','all=', 'digg','s=', 'stumbleupon','', 'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics 'segnalo','', 'ineffabile','', # Minor Australian search engines 'anzwers','search=', # Minor brazilian search engines 'engine','p1=', 'miner','q=', # Minor chinese search engines 'baidu','(wd|word)=', 'iask','(w|k)=', 'accoona','qt=', '3721','(p|name)=', 'netease','q=', 'soso','q=', 'zhongsou','(word|w)=', 'sogou', 'query=', 'vnet','kw=', # Minor czech search engines 'atlas','(searchtext|q)=', 'seznam','(w|q)=', 'quick','query=', 'centrum','q=', 'jyxo','(s|q)=', 'najdi','dotaz=', 'redbox','srch=', 'avgsearch', 'q=', # Minor danish search engines 'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=', # Minor dutch search engines 'ilse','search_for=', 'vindex','in=', # Minor english search engines 'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', 'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', 'fbdownloader','q=', 'babylon','q=', 'allgameshome', 's=', # Minor finnish search engines 'haku','w=', # Minor french search engines 'francite','name=', 'clubinternet', 'q=', 'toile', 'q=', 'biglotron','question=', 'mozbot','q=', # Minor german search engines 'aolde','q=', 'o2aolde', 'q=', 'fireball','q=', 'infoseek','qt=', 'webde','su=', 'abacho','q=', 't-online','q=', 'metaspinner','qry=', 'metacrawler_de','qry=', 'wwweasel','q=', 'netluchs','query=', 'schoenerbrausen','q=', 'gmxsuche', 'q=', 'gmxsuche_at', 'q=', 'ecosiasearch', 'q=', 'aolsearch', 'q=', 'aolsuche', 'q=', 'startxxl', 'q=', 'benefind', 'q=', 'amazonsearch', 'query=', 'wowsearch', 'q=', 'vlips_de', 'q=', 'metager', 'eingabe=', 'search_1und1_de', 'q=', 'smde', 'q=', #'sumaja', 'no query string available', #There is no query string in the referrer url 'navigationshilfe', 'q=', 'umfis', 'suchbegriff=', 'fastbot_de', 'red=[0-9]*\+', 'tixuma_de', 'sc=', # Minor Hungarian search engines 'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=', 'keresolap_hu','q=', 'startlap_hu', 'q=', 'tango_hu','q=', 'polymeta_hu','', # Minor Indian search engines 'sify','keyword=', # Minor Italian search engines 'virgilio','qs=', 'arianna','query=', 'supereva','q=', 'kataweb','q=', 'aliceitmaster','qs=', 'aliceit','qs=', 'gotuneed','', # Not yet known 'godado','Keywords=', 'jumpy\.it','searchWord=', 'shinyseek\.it','KEY=', 'teecnoit','q=', # Minor Israeli search engines 'genieo','q=', # Minor Japanese search engines 'askjp','(ask|q)=', 'sagool','q=', # Minor Norwegian search engines 'start','q=', 'eniro','q=', # Minor Polish search engines 'wp','szukaj=', 'onetpl','qt=', 'dodajpl','keyword=', 'gazetapl','slowo=', 'gerypl','q=', 'hogapl','qt=', 'netsprintpl','q=', 'interiapl','q=', 'katalogonetpl','qt=', 'o2pl','qt=', 'polskapl','qt=', 'szukaczpl','q=', 'wowpl','q=', # Minor russian search engines 'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=', 'mailru', 'q=', # Minor swedish search engines 'passagen','q=', 'enirose', 'hitta:', #Not sure if this works, as the keywords are part of the URL, and therefore the URL does not contain a question mark. # Minor swiss search engines 'searchch', 'q=', 'bluewin', 'qry=', # Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines 'pogodak', 'q=' ); # SearchEnginesKnownUrlNotFound # Known rules to extract not found keywords from a referrer search engine URL #------------------------------------------------------------------------------ %SearchEnginesKnownUrlNotFound=( # Most common search engines 'msn','origq=' ); # If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter # If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters. #------------------------------------------------------------------------------ @WordsToExtractSearchUrl= ('tn=','ie=','ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w='); @WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); # SearchEnginesKnownUTFCoding # Known parameter that proves a search engine has coded its parameters in UTF-8 #------------------------------------------------------------------------------ %SearchEnginesKnownUTFCoding=( # Most common search engines 'google','ie=utf-8', 'alltheweb','cs=utf-8' ); # SearchEnginesHashLib # List of search engines names # 'search_engine_id', 'search_engine_name', #------------------------------------------------------------------------------ %SearchEnginesHashLib=( # Major international search engines 'alexa','Alexa', 'alltheweb','AllTheWeb', 'altavista','AltaVista', 'a9', 'A9', 'dmoz','DMOZ', 'google_products','Google (Products)', 'google_base','Google (Base)', 'google_froogle','Froogle (Google)', 'google_groups','Google (Groups)', 'google_image','Google (Images)', 'google_cache','Google (cache)', 'google','Google', 'lycos','Lycos', 'msn','Microsoft MSN Search', 'live','Microsoft Windows Live', 'bing','Microsoft Bing', 'netscape','Netscape', 'aol','AOL', 'terra','Terra', 'tiscali','Tiscali', 'voila','Voila', 'search.com','Search.com', 'yahoo_mindset','Yahoo! Mindset', 'yahoo','Yahoo!', 'sympatico','Sympatico', 'excite','Excite', # Minor international search engines 'google4counter','4-counter (Google)', 'att','AT&T search (powered by Google)', 'bungeebonesdotcom','BungeeBones', 'go','Go.com', 'askde','Ask Deutschland', 'askes','Ask España', # break out Ask country specific engines. 'askfr','Ask France', 'askit','Ask Italia', 'asknl','Ask Nederland', 'ask','Ask', 'atomz','Atomz', 'dejanews','DejaNews', 'euroseek','Euroseek', 'findarticles','Find Articles', 'go2net','Go2Net (Metamoteur)', 'hotbot','Hotbot', 'infospace','InfoSpace', 'kvasir','Kvasir', 'looksmart','Looksmart', 'mamma','Mamma', 'metacrawler','MetaCrawler (Metamoteur)', 'nbci','NBCI', 'northernlight','NorthernLight', 'overture','Overture', # Replace 'goto\.com','Goto.com', 'dogpile','Dogpile', 'spray','Spray', 'teoma','Teoma', # Replace 'directhit\.com','DirectHit', 'webcrawler','WebCrawler', 'wisenut','WISENut', 'ixquick','ix quick', 'earthlink', 'Earth Link', 'iune','i-une', 'blingo','Blingo', 'centraldatabase','GPU p2p search', 'clusty','Clusty', 'mysearch','My Search', 'vivisimo','Vivisimo', 'kartoo','Kartoo', 'icerocket','Icerocket (Blog)', 'sphere','Sphere (Blog)', 'ledix','Ledix', 'shawca','Shaw.ca', 'searchalot','Searchalot', 'copernic','Copernic', 'avantfind','Avantfind', 'steadysearch','Avantfind', 'clarosearch','Claro Search', 'searchresults','Search-results', 'holasearch', 'Hola Search', 'conduit', 'Conduit Search', 'flipora', 'Flipora', 'delta-search', 'Delta Search', 'iminent', 'Iminent', 'searchmobileonline', 'Search Mobile Online (StartApp)', 'nortonsavesearch', 'Norton Safe Search', 'inbox', 'Inbox Search', 'govome', 'Govome', 'find1friend', 'Find1Friend', 'mysearchdial', 'My Search Dial', 'speedbit', 'Speedbit', 'certifiedtoolbarsearch', 'Certified-Toolbar Search', 'sweetim', 'SweetIM Search', 'searchcompletion', 'SearchCompletion Search', 'eazelsearch', 'Eazel Search', 'searchfunmoods', 'Funmoods', 'googleByIP', 'Google (Access by IP-Address)', 'dalesearch', 'Dale Search', 'sweetpacks', 'Sweetpacks', 'searchgol', 'Search-Gol', 'duckduckgo', 'DuckDuckGo (Does not provide search keyphrases, using found page instead)', 'facemoods', 'Facemoods Search', 'shoppstop', 'ShoppStop', 'searchya', 'Searchya', 'picsearch', 'picsearch', 'webssearches', 'Web Searches', 'zapmeta', 'ZapMeta', 'localmoxie', 'Local Moxie', # Chello Portals 'chelloat','Chello Austria', 'chellobe','Chello Belgium', 'chellocz','Chello Czech Republic', 'chellofr','Chello France', 'chellohu','Chello Hungary', 'chellonl','Chello Netherlands', 'chellono','Chello Norway', 'chellopl','Chello Poland', 'chellose','Chello Sweden', 'chellosk','Chello Slovakia', 'chellocom','Chello (Country not recognized)', # Mirago 'miragobe','Mirago Belgium', 'miragoch','Mirago Switzerland', 'miragode','Mirago Germany', 'miragodk','Mirago Denmark', 'miragoes','Mirago Spain', 'miragofr','Mirago France', 'miragoit','Mirago Italy', 'miragonl','Mirago Netherlands', 'miragono','Mirago Norway', 'miragose','Mirago Sweden', 'miragocouk','Mirago UK', 'mirago','Mirago (country unknown)', 'answerbus','Answerbus', 'icq','icq', 'nusearch','Nusearch', 'goodsearch','GoodSearch', 'scroogle','Scroogle', 'questionanswering','Questionanswering', 'mywebsearch','MyWebSearch', 'comettoolbar','Comet toolbar search', # Social Bookmarking Services 'delicious','del.icio.us (Social Bookmark)', 'digg','Digg (Social Bookmark)', 'stumbleupon','Stumbleupon (Social Bookmark)', 'swik','Swik (Social Bookmark)', 'segnalo','Segnalo (Social Bookmark)', 'ineffabile','Ineffabile.it (Social Bookmark)', # Minor Australian search engines 'anzwers','anzwers.com.au', # Minor brazilian search engines 'engine','Cade', 'miner','Meta Miner', # Minor chinese search engines 'baidu','Baidu', 'iask','Iask', 'accoona','Accoona', '3721','3721', 'netease', 'NetEase', 'soso','SoSo', 'zhongsou','ZhongSou', 'sogou', 'SoGou', 'vnet','VNet', # Minor czech search engines 'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz', 'avgsearch', 'AVG Secure Search', # Minor danish search-engines 'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir', # Minor dutch search engines 'ilse','Ilse','vindex','Vindex\.nl', # Minor english search engines 'askuk','Ask UK', 'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK', 'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', 'fbdownloader','FBDownloader', 'babylon','Babylon', 'allgameshome', 'AllGamesHome', # Minor finnish search engines 'haku','Ihmemaa', # Minor french search engines 'aolfr','AOL (fr)', 'ctrouve','C\'est trouve', 'francite','Francite', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet', 'toile', 'Toile du Quebec', 'biglotron','Biglotron', 'mozbot','Mozbot', # Minor German search engines 'aolde','AOL (de)', 'o2aolde', 'o2 Suche', 'fireball','Fireball', 'infoseek','Infoseek', 'webde','Web.de', 'abacho','Abacho', 't-online','T-Online', 'allesklar','allesklar.de', 'meinestadt','meinestadt.de', 'metaspinner','metaspinner', 'metacrawler_de','metacrawler.de', 'wwweasel','WWWeasel', 'netluchs','Netluchs', 'schoenerbrausen','Schoenerbrausen/', 'gmxsuche', 'GMX Suche', 'gmxsuche_at', 'GMX Suche �sterreich', 'ecosiasearch', 'Ecosia Search', 'aolsearch', 'AOL Search', 'aolsuche', 'AOL Suche', 'startxxl', 'StartXXL', 'benefind', 'benefind', 'amazonsearch', 'Amazon Web Search', 'wowsearch', 'Wow Search', 'vlips_de', 'vlips.de', 'metager', 'MetaGer', 'search_1und1_de', '1&1 Suche', 'smde', 'SM.de - Die SuchMaschine', 'sumaja', 'Sumaja', 'navigationshilfe', 'T-Online Navigationshilfe', 'umfis', 'UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland', 'fastbot_de', 'Fastbot.de (Does not provide search keyphrases, using found page instead)', 'tixuma_de', 'Tixuma Deutschland', # Minor hungarian search engines 'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso', 'tango_hu','Tango', 'keresolap_hu','Tango keresolap', 'startlap_hu','Startlab Kereso', 'polymeta_hu','Polymeta', # Minor Indian search engines 'sify','Sify', # Minor Italian search engines 'virgilio','Virgilio', 'arianna','Arianna', 'supereva','Supereva', 'kataweb','Kataweb', 'aliceitmaster','search.alice.it.master', 'aliceit','alice.it', 'gotuneed','got u need', 'godado','Godado.it', 'jumpy\.it','Jumpy.it', 'shinyseek\.it','Shinyseek.it', 'teecnoit','Teecno', # Minor Israeli search engines 'genieo','Genieo', # Minor Japanese search engines 'askjp','Ask Japan', 'sagool','Sagool', # Minor Norwegian search engines 'start','start.no', 'eniro','Eniro', # Minor polish search engines 'wp','Wirtualna Polska', 'onetpl','Onet.pl', 'dodajpl','Dodaj.pl', 'gazetapl','Gazeta.pl', 'gerypl','Gery.pl', 'hogapl','Hoga.pl', 'netsprintpl','NetSprint.pl', 'interiapl','Interia.pl', 'katalogonetpl','Katalog.Onet.pl', 'o2pl','o2.pl', 'polskapl','Polska', 'szukaczpl','Szukacz', 'wowpl','Wow.pl', # Minor russian search engines 'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot', 'mailru','Mail.Ru', # Minor Swedish search engines 'passagen','Evreka', 'enirose','Eniro Sverige', # Minor Slovak search engines 'zoznam','Zoznam', # Minor Portuguese search engines 'sapo','Sapo', # Minor Swiss search engines 'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch', # Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines 'pogodak','Pogodak.com', # Generic search engines 'search','Unknown search engines' ); # Sanity check. # Enable this code and run perl search_engines.pm to check file entries are ok #----------------------------------------------------------------------------- #foreach my $key (@SearchEnginesSearchIDOrder_list1) { # if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); # foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } } # foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } } #} } #foreach my $key (@SearchEnginesSearchIDOrder_list2) { # if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); # foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } } # foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } } #} } #foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } } #foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } } #foreach my $key (keys %SearchEnginesKnownUrl) { # my $found=0; # foreach my $key2 (values %SearchEnginesHashID) { # if ($key eq $key2) { $found=1; last; } # } # if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; } #} #foreach my $key (keys %SearchEnginesHashLib) { # my $found=0; # foreach my $key2 (values %SearchEnginesHashID) { # if ($key eq $key2) { $found=1; last; } # } # if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; } #} #print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen; 1;