#!/bin/sed -f # Version 0.6 # Description: # Prints queries from different search engines # out of the referrer from an apache weblog # Changelog: # 0.1: # initial release # 0.2: # added lycos, fireball, suchen, abacho, other # # Tue Aug 6 09:09:41 UTC 2002 # # Flags (no particular order): # g ... Google # v ... altaVista # a ... Abacho # l ... Lycos # f ... Fireball # s ... Suchen.com # t ... allTheweb # r ... webferret.search.com # i ... Iol.it # w ... wisenut # o ... other 1i\ Flag | IP | Query | Result # not a query /?/!d /Googlebot/d /WebWasher/d /ezboard/d /reeler\.org/d /? /d # referrer is empty XXX is this correct? /"-"/d # /google\.\|altavista\.\|lycos\.\|fireball\.\|suchen\.com\|abacho.de\|alltheweb\.\|iol\.it/!d # delete time s/\([^ ]*\).*GET /\1/ # delete everything up to the referrer s/ [^"]*"// s/ [^"]*"\([^ ]\)/ | "\1/ /suchen\.com/s/^/s|||/ /altavista\./s/^/v|||/ /google\./s/^/g|||/ /^.|||/!{/lycos\./s/^/l|||/;} /^.|||/!{/abacho\./s/^/a|||/;} /fireball\./s/^/f|||/ /alltheweb\./s/^/t|||/ /iol\.it/s/^/i|||/ /wisenut\.com/s/^/w|||/ /^.|||/!s/^/o|||/ s/keyword=// s/ask=// # XXX hack /netscape.com/s/search=// s, | .*res_text?, | , s, | .*search?, | , s, | .*query?, | , s, | .*query_..?, | , s, | .*/linux?, | , s, | .*click?, | , s, | .*\.jhtml?, | , s, | .*\.html?, | , s, | .*\.htm?, | , s, | .*\.asp?, | , s, | .*\.lor?, | , s, | .*\.gw?, | , s, | .*\.pl?, | , s, | .*\.dll?, | , s, | .*translate_c?, | , # lycos s, | .*/pursuit?, | , # fireball s, | .*/query\.fcg?, | , s, | .*/..query\.fcg?, | , # suchen.com s, | .*/?, | , # abacho.de s, | .*\.cfm?, | , s, | .*/metasuche_suchen.phtml?, | , # google images s, | .*images?,, # Altavista s, | .*/q?, | , \, | .*/r?,d s, | .*/web?, | , s, | .*/iepane?, | , s, | .*Query?XX, | , # netscape s, | .*\.psp?, | , s, | .*\.jsp?, | , # Google Advanced Search? s/as_q=// s/as_sitesearch=[^&"]*//g s/as_occt=[^&"]*//g s/as_epq=[^&"]*//g s/as_qdr=[^&"]*//g s/as_eqt=[^&"]*//g s/as_dt=[^&"]*//g s/as_eq=[^&"]*//g s/as_oq=[^&"]*//g s/as_qt=[^&"]*//g # lycos s,"http://www.google.com/[^?]*?,, s/&NL// s/Keywords=// # delete Search Flags # Order matters! s/searchselector=[^&"]*//g s/de-blitzsuche=[^&"]*//g s/de-allesklar=[^&"]*//g s/de-interfux=[^&"]*//g s/ItemOrdinal=[^&"]*//g s/search_type=[^&"]*//g s/de-crawler=[^&"]*//g s/de-tonline=[^&"]*//g s/de-qualigo=[^&"]*//g s/metaEngine=[^&"]*//g s/w_wynikach=[^&"]*//g # NINE s/de-abacho=[^&"]*//g s/de-aladin=[^&"]*//g s/de-excite=[^&"]*//g s/de-nathan=[^&"]*//g s/de-spider=[^&"]*//g s/imgrefurl=[^&"]*//g s/matchmode=[^&"]*//g s/metaTopic=[^&"]*//g s/newwindow=[^&"]*//g s/qcategory=[^&"]*//g s/querytime=[^&"]*//g s/source-id=[^&"]*//g s/startfrom=[^&"]*//g s/qCategory=[^&"]*//g # EIGHT s/de-lycos=[^&"]*//g s/de-witch=[^&"]*//g s/de-yahoo=[^&"]*//g s/language=[^&"]*//g s/per_page=[^&"]*//g s/restrict=[^&"]*//g s/search.x=[^&"]*//g s/search.y=[^&"]*//g s/smartref=[^&"]*//g s/sourceid=[^&"]*//g s/Szukaj.x=[^&"]*//g s/Szukaj.y=[^&"]*//g s/szukaj.x=[^&"]*//g s/szukaj.y=[^&"]*//g # SEVEN s/cat\.118=[^&"]*//g s/channel=[^&"]*//g s/charset=[^&"]*//g s/country=[^&"]*//g s/de-dino=[^&"]*//g s/de-eule=[^&"]*//g s/domains=[^&"]*//g s/engines=[^&"]*//g s/maxhits=[^&"]*//g s/MetaURL=[^&"]*//g s/qsource=[^&"]*//g s/startat=[^&"]*//g s/submit2=[^&"]*//g s/timeout=[^&"]*//g s/engine[0-9]=[^&"]*//g s/engine1[0-9]=[^&"]*//g # SIX s/action=[^&"]*//g s/amount=[^&"]*//g s/aveenc=[^&"]*//g s/client=[^&"]*//g s/de-aol=[^&"]*//g s/design=[^&"]*//g s/domain=[^&"]*//g s/family=[^&"]*//g s/filter=[^&"]*//g s/format=[^&"]*//g s/imgurl=[^&"]*//g s/inlang=[^&"]*//g s/logQID=[^&"]*//g s/method=[^&"]*//g s/offset=[^&"]*//g s/origin=[^&"]*//g s/search=[^&"]*//g s/source=[^&"]*//g s/status=[^&"]*//g s/submit=[^&"]*//g s/target=[^&"]*//g # FIVE s/brand=[^&"]*//g s/delay=[^&"]*//g s/dexcl=[^&"]*//g s/dincl=[^&"]*//g s/etemp=[^&"]*//g s/first=[^&"]*//g s/frame=[^&"]*//g s/Input=[^&"]*//g s/logid=[^&"]*//g s/mtemp=[^&"]*//g s/origq=[^&"]*//g s/scope=[^&"]*//g s/smode=[^&"]*//g s/spell=[^&"]*//g s/start=[^&"]*//g s/terms=[^&"]*//g s/where=[^&"]*//g # FOUR s/aloc=[^&"]*//g s/back=[^&"]*//g s/base=[^&"]*//g s/btnG=[^&"]*//g s/enco=[^&"]*//g s/exec=[^&"]*//g s/FORM=[^&"]*//g s/hits=[^&"]*//g s/lang=[^&"]*//g s/look=[^&"]*//g s/meta=[^&"]*//g s/nooc=[^&"]*//g s/nosp=[^&"]*//g s/npl1=[^&"]*//g s/page=[^&"]*//g s/prev=[^&"]*//g s/rurl=[^&"]*//g s/safe=[^&"]*//g s/seed=[^&"]*//g s/site=[^&"]*//g s/tmpl=[^&"]*//g s/type=[^&"]*//g s/view=[^&"]*//g s/what=[^&"]*//g # THREE s/qid=[^&"]*//g s/cmd=[^&"]*//g s/UDo=[^&"]*//g s/adv=[^&"]*//g s/AM1=[^&"]*//g s/amb=[^&"]*//g s/cat=[^&"]*//g s/cfg=[^&"]*//g s/cof=[^&]*//g s/enc=[^&"]*//g s/fmt=[^&"]*//g s/lan=[^&"]*//g s/loc=[^&"]*//g s/lpv=[^&"]*//g s/nbq=[^&"]*//g s/num=[^&"]*//g s/pag=[^&"]*//g s/ref=[^&"]*//g s/stq=[^&"]*//g s/SWR=[^&"]*//g s/tag=[^&"]*//g s/idx=[^&"]*//g s/uri=[^&"]*//g # TWO s/LA=[^&"]*//g s/CO=[^&"]*//g s/UN=[^&"]*//g s/un=[^&"]*//g s/c1=[^&"]*//g s/ca=[^&"]*//g s/cb=[^&"]*//g s/cl=[^&"]*//g s/cn=[^&"]*//g s/cr=[^&"]*//g s/cp=[^&"]*//g s/d0=[^&"]*//g s/d1=[^&"]*//g s/d2=[^&"]*//g s/do=[^&"]*//g s/dt=[^&"]*//g s/fr=[^&"]*//g s/fs=[^&"]*//g s/gi=[^&"]*//g s/hc=[^&"]*//g s/hl=[^&"]*//g s/hq=[^&"]*//g s/hs=[^&"]*//g s/id=[^&"]*//g s/ie=[^&"]*//g s/kl=[^&"]*//g s/la=[^&"]*//g s/lr=[^&"]*//g s/oe=[^&"]*//g s/p1=[^&"]*//g s/pg=[^&"]*//g s/pm=[^&"]*//g s/pn=[^&"]*//g s/pp=[^&"]*//g s/ps=[^&"]*//g s/qb=[^&"]*//g s/qe=[^&"]*//g s/rd=[^&"]*//g s/RS=[^&"]*//g s/rs=[^&"]*//g s/s1=[^&"]*//g s/sa=[^&"]*//g s/si=[^&"]*//g s/sc=[^&"]*//g s/sn=[^&"]*//g s/SQ=[^&"]*//g s/ss=[^&"]*//g s/sv=[^&"]*//g s/sw=[^&"]*//g s/to=[^&"]*//g s/tt=[^&"]*//g s/rl=[^&"]*//g s/wh=[^&"]*//g s/TR=[^&"]*//g s/A[0-9]=[^&"]*//g s/B[0-9]=[^&"]*//g s/C[0-9]=[^&"]*//g s/u[0-9]=[^&"]*//g # yes, some really use this s/&=[^&"]*//g s/szukaj=// s/query=// s/qry=// s/key=// s/qr=// s/MT=// s/su=// s/q=// s/p=// # ONE # has to be last s/[a-z]=[^&"]*//g s/&hc//g s/" "/ | "/ s/&&*//g s/+/ /g s,/, | /, s,cache:[^:]:*[^/]*/,cache:/, s,|||,///, # delete Browsertype s,\(.*\)|[^|]*$,\1 |, # swap Query and Result s,| \([^|]*\)| \([^|]*\)|, | \2 | \1, s,///, | , s,%20, ,g s,%21,!,g s,%22,",g s,%23,#,g s,%24,$,g s,%25,%,g s,%26,\&,g s,%27,',g s,%28,(,g s,%29,),g s,%2[Aa],*,g s,%2[Bb],+,g s,%2[Cc],\,,g s,%2[Dd],-,g s,%2[Ee],.,g s,%2[Ff],/,g s,%3[Aa],:,g s,%3[Bb],;,g s,%3[Cc],<,g s,%3[Dd],=,g s,%3[Ee],>,g s,%3[Ff],?,g s,%40,?,g s,%41,@,g s,%5[Cc],\\,g s,%5[Ee],^,g s,%60,_,g s,%61,`,g s,%7[Bb],{,g s,%7[Cc],|,g s,%7[Dd],},g s,%7[Ee],~,g s,%[Cc]2%[Aa]7,§,g s,%[Aa]7,§,g s,%[Dd][Ff],ß,g s,%[Ff][Cc],ü,g s,%[Dd][Cc],Ü,g s,%[Cc]3%[Bb][Cc],ü,g s,%[Cc]3%[Bb]6,ö,g s,%[Dd]6,Ö,g s,%[Ff]6,ö,g s,%[Cc]4,Ä,g s,%[Cc]3%84,Ä,g s,%[Ee]4,ä,g s,%[Cc]3%[Aa]4,ä,g s, *, ,g