############################################## # These Bots must not index at all User-agent: Googlebot-Image User-agent: msnbot-media User-agent: Twiceler User-agent: Accoona User-agent: Seekbot User-agent: Firefox User-agent: turnitinbot User-agent: Exabot-Images Disallow: / ############################################## # Exabot see: http://www.exalead.com/about/document/53 User-agent: Exabot Crawl-delay: 30 Disallow: *? ############################################## # Selected User Agents # Bots that parse the Allow directive, may overwrite the Disallowed urls User-agent: Googlebot User-agent: Teoma User-agent: Yahoo User-agent: search.ch User-agent: msnbot User-agent: spidersearch User-agent: VoilaBot Disallow: /musik Disallow: /musik/ Disallow: /rsc Disallow: /rsc/ Disallow: /download.htm Disallow: /bdeonline/museum Disallow: /BDE_DB_export.html Disallow: /KJVTenachBDE.html Disallow: /LuTeLXXBDE.html Disallow: /mail.html Disallow: /cgi/mail Disallow: *.jpg$ Disallow: *.jpeg$ Disallow: *.gif$ Disallow: *.png$ Disallow: *.doc$ Disallow: *.odt$ Disallow: *.zip$ Disallow: /ehome-factory/__ Disallow: /ehome-factory/__* Disallow: /ehome-factory/admin Allow: /ehome-factory/ Allow: /kaffeesatz Allow: /kaffeesatz/ Allow: /cgi/tok.pl?extern=* Allow: /cgi/tok.pl?lang=0$ Allow: /cgi/tok.pl?lang=1$ Allow: /cgi/tok.pl$ ############################################## # All Other User Agents # I suppose they don't understand an Allow directive. User-agent: * Disallow: /musik Disallow: /musik/ Disallow: /rsc Disallow: /rsc/ Disallow: /cgi/mail Disallow: /cgi/mail/ Disallow: *.jpg$ Disallow: *.jpeg$ Disallow: *.gif$ Disallow: *.png$ Disallow: *.doc$ Disallow: *.odt$ Disallow: *.zip$ Disallow: /download.htm Disallow: /bdeonline/museum Disallow: /BDE_DB_export.html Disallow: /KJVTenachBDE.html Disallow: /LuTeLXXBDE.html Disallow: /mail.html Disallow: /ehome-factory/admin Disallow: /ehome-factory/__ ############################### # Notes # Google # siehe http://www.google.com/support/webmasters/bin/answer.py?answer=40367&topic=8846 # # Turnitin: A bot for a non public database (plagiarism prevention service) # siehe http://www.turnitin.com/robot/crawlerinfo.html # I suspect, it reads but disrespect the directive. # status: blocked by htaccess # # END #