##########################################
# Robots.txt #
# Generated at 13.03.2010 20:46:32 GMT+1 #
# 2450 User Agents blocked so far #
# Copyright 2010 by DT.N #
##########################################
# Wildcard
User-Agent: *
Disallow: /
# Sync2It bookmark management & clustering engine
User-Agent: !Susie (http://www.sync2it.com/susie)
Disallow: /
# UnCHAOS search robot
User-Agent: UnChaos From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)
Disallow: /
# UnCHAOS search robot
User-Agent: UnChaos Bot Hybrid Web Search Engine. (vadim_gonchar@unchaos.com)
Disallow: /
# UnCHAOS search robot
User-Agent: UnChaosBot From Chaos To Order UnChaos Hybrid Web Search Engine at www.unchaos.com (info@unchaos.com)
Disallow: /
# Sygol Search (Italy) robot
User-Agent: http://www.sygol.com
Disallow: /
# SearchEngineWorld's robots.txt validator
User-Agent: ( Robots.txt Validator http://www.searchengineworld.com/cgi-bin/robotcheck.cgi )
Disallow: /
# DreamCast DreamPassport browser
User-Agent: (DreamPassport/3.0; isao/MyDiGiRabi)
Disallow: /
# Privoxy web proxy
User-Agent: (Privoxy/1.0)
Disallow: /
# Unknown Yahoo robot
User-Agent: */Nutch-0.9-dev
Disallow: /
# SitiDi.net search (Germany) robot
User-Agent: +SitiDi.net/SitiDiBot/1.0 (+Have Good Day)
Disallow: /
# Die Kraehe Meta-Search-Engine (Germany) link checking
User-Agent: -DIE-KRAEHE- META-SEARCH-ENGINE/1.1 http://www.die-kraehe.de
Disallow: /
# 123spider.de (Germany) web directory link checking
User-Agent: 123spider-Bot (Version: 1.02, powered by www.123spider.de
Disallow: /
# 192.com - UK web directory
User-Agent: 192.comAgent
Disallow: /
# 1st ZipCommander Net - IE based browser
User-Agent: 1st ZipCommander (Net) - http://www.zipcommander.com/
Disallow: /
# 2Bone online link checker
User-Agent: 2Bone_LinkChecker/1.0 libwww-perl/5.64
Disallow: /
# 4Anything robot
User-Agent: 4anything.com LinkChecker v2.0
Disallow: /
# Unknown guestbook spamming or harvesting tool from diff. IPs
User-Agent: 8484 Boston Project v 1.0
Disallow: /
# neofonie search robot Germany
User-Agent: :robot/1.0 (linux) ( admin e-mail: undefined http://www.neofonie.de/loesungen/search/robot.html )
Disallow: /
# A-Online.at robot - now Jet2Web Search
User-Agent: A-Online Search
Disallow: /
# A1 Keyword Research - search engine and keyword optimization software
User-Agent: A1 Keyword Research/1.0.2 (+http://www.micro-sys.dk/products/keyword-research/) miggibot/2007.03.27
Disallow: /
# MiggiBot website crawler engine - A1 Sitemap Generator
User-Agent: A1 Sitemap Generator/1.0 (+http://www.micro-sys.dk/products/sitemap-generator/) miggibot/2006.01.24
Disallow: /
# Aardvark web crawler for Sun's Blog recommendations
User-Agent: aardvark-crawler
Disallow: /
# Abacho / Crawler.de robot
User-Agent: AbachoBOT
Disallow: /
# Abacho / Crawler.de robot
User-Agent: AbachoBOT (Mozilla compatible)
Disallow: /
# ABCdatos - Castilian program & tutorial directory
User-Agent: ABCdatos BotLink/5.xx.xxx#BBL
Disallow: /
# Aberja Hybridsuchmaschine (Germany) link checking
User-Agent: Aberja Checkomat
Disallow: /
# Nameprotect copyright search robot (24.177.134.x)
User-Agent: abot/0.1 (abot; http://www.abot.com; abot@abot.com)
Disallow: /
# About robot
User-Agent: About/0.1libwww-perl/5.47
Disallow: /
# Accela Technology RSS feed crawler
User-Agent: Accelatech RSSCrawler/0.4
Disallow: /
# Accoona Search robot
User-Agent: accoona
Disallow: /
# Accoona Search robot
User-Agent: Accoona-AI-Agent/1.1.1 (crawler at accoona dot com)
Disallow: /
# Accoona Search robot
User-Agent: Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com)
Disallow: /
# Ace Explorer - IE based browser
User-Agent: Ace Explorer
Disallow: /
# Ackerm search robot
User-Agent: Ack (http://www.ackerm.com/)
Disallow: /
# Acoi picture finder robot
User-Agent: AcoiRobot
Disallow: /
# Acoon.de search (Germany) robot
User-Agent: Acoon Robot v1.50.001
Disallow: /
# Acoon.de search (Germany) robot
User-Agent: Acoon Robot v1.52 (http://www.acoon.de)
Disallow: /
# Acoon.de search (Germany) robot
User-Agent: Acoon-Robot 4.0.x.[xx] (http://www.acoon.de)
Disallow: /
# Acoon.de search (Germany) robot
User-Agent: Acoon-Robot v3.xx (http://www.acoon.de and http://www.acoon.com)
Disallow: /
# Acorn Search Project
User-Agent: Acorn/Nutch-0.9 (Non-Profit Search Engine; acorn.isara.org; acorn at isara dot org)
Disallow: /
# LibMaster.com Active Bookmark HTML page creator
User-Agent: ActiveBookmark 1.x
Disallow: /
# Activeworlds 3D homepage browser
User-Agent: Activeworlds
Disallow: /
# Activeworlds 3D homepage browser
User-Agent: ActiveWorlds/3.xx (xxx)
Disallow: /
# Ad Muncher - banner killer
User-Agent: Ad Muncher v4.xx.x
Disallow: /
# Ad Muncher - banner killer
User-Agent: Ad Muncher v4x Build xxxxx
Disallow: /
# website directory adaxas link checking
User-Agent: Adaxas Spider (http://www.adaxas.net/)
Disallow: /
# Avant Browser - IE based browser
User-Agent: Advanced Browser (http://www.avantbrowser.com)
Disallow: /
# Aesop robot
User-Agent: AESOP_com_SpiderMan
Disallow: /
# Agada search (Germany) robot
User-Agent: agadine/1.x.x (+http://www.agada.de)
Disallow: /
# SharewarePlaza link checking
User-Agent: Agent-SharewarePlazaFileCheckBot/2.0+(+http://www.SharewarePlaza.com)
Disallow: /
# Linkomatic submission verifier
User-Agent: AgentName/0.1 libwww-perl/5.48
Disallow: /
# 21seek.com (China) robot (218.17.90.xxx)
User-Agent: AIBOT/2.1 By +(www.21seek.com A Real artificial intelligence search engine China)
Disallow: /
# AideRss - Postrank RSS and Blog filtering
User-Agent: AideRSS/1.0 (aiderss.com)
Disallow: /
# Nameprotect copyright search robot (24.177.134.x)
User-Agent: aipbot/1.0 (aipbot; http://www.aipbot.com; aipbot@aipbot.com)
Disallow: /
# Nameprotect copyright search robot (24.177.134.x)
User-Agent: aipbot/2-beta (aipbot dev; http://aipbot.com; aipbot@aipbot.com)
Disallow: /
# Akregator news feed reader for KDE
User-Agent: Akregator/1.2.9; librss/remnants
Disallow: /
# Aladin robot
User-Agent: Aladin/3.324
Disallow: /
# Phone.com UP.Browser for mobiles on Alcatel cellphone
User-Agent: Alcatel-BG3/1.0 UP.Browser/5.0.3.1.2
Disallow: /
# Aleksika Danmark - Search engine optimization spider
User-Agent: Aleksika Spider/1.0 (+http://www.aleksika.com/)
Disallow: /
# Alertinfo - French version of Feedreader 3.xx
User-Agent: AlertInfo 2.0 (Powered by Newsbrain)
Disallow: /
# Vestris robot
User-Agent: AlkalineBOT/1.3
Disallow: /
# Vestris robot
User-Agent: AlkalineBOT/1.4 (1.4.0326.0 RTM)
Disallow: /
# Allesklar.de robot
User-Agent: Allesklar/0.1 libwww-perl/5.46
Disallow: /
# Alligator download manager
User-Agent: Alligator 1.31 (www.nearsoftware.com)
Disallow: /
# Unknown robot from Allrati.com
User-Agent: Allrati/1.1 (+)
Disallow: /
# Altavista robot
User-Agent: AltaVista Intranet V2.0 AVS EVAL search@freeit.com
Disallow: /
# Altavista robot
User-Agent: AltaVista Intranet V2.0 Compaq Altavista Eval sveand@altavista.net
Disallow: /
# Altavista robot
User-Agent: AltaVista Intranet V2.0 evreka.com crawler@evreka.com
Disallow: /
# Altavista robot
User-Agent: AltaVista V2.0B crawler@evreka.com
Disallow: /
# Amfibi Search robot
User-Agent: AmfibiBOT
Disallow: /
# Amfibi Search robot
User-Agent: Amfibibot/0.06 (Amfibi Web Search; http://www.amfibi.com; agent@amfibi.com)
Disallow: /
# Amfibi Search robot
User-Agent: Amfibibot/0.07 (Amfibi Robot; http://www.amfibi.com; agent@amfibi.com)
Disallow: /
# amibot - Amidalla search engine robot (62.241.33.xx)
User-Agent: amibot
Disallow: /
# AWeb Amiga browser
User-Agent: Amiga-AWeb/3.4.167SE
Disallow: /
# Voyager - Amiga browser
User-Agent: AmigaVoyager/3.4.4 (MorphOS/PPC native)
Disallow: /
# Amiga Miami TCP Stack
User-Agent: AmiTCP Miami (AmigaOS 2.04)
Disallow: /
# NF embedded browser on Amois Skypephone
User-Agent: Amoi 8512/R21.0 NF-Browser/3.3
Disallow: /
# Amazon.com robot for checking their affiliate sites
User-Agent: amzn_assoc
Disallow: /
# Annomille Italian historical oriented robot
User-Agent: AnnoMille spider 0.1 alpha - http://www.annomille.it
Disallow: /
# annotate Google - Firefox extension for annotating Google search results
User-Agent: annotate_google; http://ponderer.org/download/annotate_google.user.js
Disallow: /
# Megaproxy user
User-Agent: Anonymized by ProxyOS: http://www.megaproxy.com
Disallow: /
# faked user agent
User-Agent: Anonymizer/1.1
Disallow: /
# AnswerBus natural language search using COLLATE technology
User-Agent: AnswerBus (http://www.answerbus.com/)
Disallow: /
# AnswerChase search tool
User-Agent: AnswerChase PROve x.0
Disallow: /
# AnswerChase search tool
User-Agent: AnswerChase x.0
Disallow: /
# ANT Fresco Browser
User-Agent: ANTFresco/x.xx
Disallow: /
# Antibot (discontinued) robot
User-Agent: antibot-V1.1.5/i586-linux-2.2
Disallow: /
# Anzwers (Yahoo) Australia robot
User-Agent: AnzwersCrawl/2.0 (anzwerscrawl@anzwers.com.au;Engine)
Disallow: /
# Apexoo Search spider
User-Agent: Apexoo Spider 1.x
Disallow: /
# JavaOS app. for SEGA Saturn Internet and Sanyo Internet-TV
User-Agent: Aplix HTTP/1.0.1
Disallow: /
# JavaOS app. for Sanyo Internet-TV
User-Agent: Aplix_SANYO_browser/1.x (Japanese)
Disallow: /
# JavaOS app. for SEGA Saturn Internet
User-Agent: Aplix_SEGASATURN_browser/1.x (Japanese)
Disallow: /
# Aport robot
User-Agent: Aport
Disallow: /
# Walhello Internet Search robot
User-Agent: appie 1.1 (www.walhello.com)
Disallow: /
# CoreMedia player on Apple iPhone
User-Agent: Apple iPhone v1.1.4 CoreMedia v1.0.0.4A102
Disallow: /
# PubSub - Mac OS X utility for managing RSS/Atom subscriptions via the PubSub framework
User-Agent: Apple-PubSub/65.1.1
Disallow: /
# Araby search - Arabia
User-Agent: ArabyBot (compatible; Mozilla/5.0; GoogleBot; FAST Crawler 6.4; http://www.araby.com;)
Disallow: /
# Covac Arachnid Web Crawler
User-Agent: ArachBot
Disallow: /
# Euroseek spider
User-Agent: Arachnoidea (arachnoidea@euroseek.com)
Disallow: /
# Amazon.com robot for checking their affiliate sites
User-Agent: aranhabot
Disallow: /
# Excite spider
User-Agent: ArchitextSpider
Disallow: /
# Heritrix - The Internet Archive's open-source crawler (207.241.225.2xx)
User-Agent: archive.org_bot
Disallow: /
# Simpy Bookmarklet crawler (69.55.233.xx)
User-Agent: Argus/1.1 (Nutch; http://www.simpy.com/bot.html; feedback at simpy dot com)
Disallow: /
# Arikus inContext search engine software
User-Agent: Arikus_Spider
Disallow: /
# Tomba project: the Portuguese web archive
User-Agent: Arquivo-web-crawler (compatible; heritrix/1.12.1 +http://arquivo-web.fccn.pt)
Disallow: /
# Asaha search robot (Turkey)
User-Agent: ASAHA Search Engine Turkey V.001 (http://www.asaha.com/)
Disallow: /
# ASAHINA Antenna information detecting agent
User-Agent: Asahina-Antenna/1.x
Disallow: /
# ASAHINA Antenna information detecting agent
User-Agent: Asahina-Antenna/1.x (libhina.pl/x.x ; libtime.pl/x.x)
Disallow: /
# Ask 24x Info robot
User-Agent: ask.24x.info
Disallow: /
# Ask About Oil - Petroleum related search (24.227.212.xxx) using Nutch
User-Agent: AskAboutOil/0.06-rcp (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@askaboutoil.com)
Disallow: /
# askEd! / Inferret search (Japan) robot using Nutch
User-Agent: asked/Nutch-0.8 (web crawler; http://asked.jp; epicurus at gmail dot com)
Disallow: /
# ASPSeek search engine software -Yahoo-Inc. / Telecom Canada robot
User-Agent: ASPSeek/1.2.5
Disallow: /
# Swsoft.net robot using Aspseek
User-Agent: ASPseek/1.2.9d
Disallow: /
# ASPSeek search engine software
User-Agent: ASPSeek/1.2.x
Disallow: /
# ASPSeek search engine software
User-Agent: ASPSeek/1.2.xa
Disallow: /
# ASPSeek search engine software
User-Agent: ASPseek/1.2.xx
Disallow: /
# ASPSeek search engine software
User-Agent: ASPSeek/1.2.xxpre
Disallow: /
# Associative Sort robot
User-Agent: ASSORT/0.10
Disallow: /
# Singingfish media spider (64.12.186.2xx) via AOL search
User-Agent: asterias/2.0
Disallow: /
# Atlocal local business search robot
User-Agent: AtlocalBot/1.1 +(http://www.atlocal.com/local-web-site-owner.html)
Disallow: /
# Atomic Email Hunter email extracing and harvesting
User-Agent: Atomic_Email_Hunter/4.0
Disallow: /
# Atomz robot
User-Agent: Atomz/1.0
Disallow: /
# atSpider (ceased) email harvester / spambot
User-Agent: atSpider/1.0
Disallow: /
# Attentio social media monitoring and analysing
User-Agent: Attentio/Nutch-0.9-dev (Attentio's beta blog crawler; www.attentio.com; info@attentio.com)
Disallow: /
# Samsung SPH-A660 phone with Sprint software
User-Agent: AU-MIC/2.0 MMP/2.0
Disallow: /
# Audiovox SMT5600 (AT&T) Smartphone mobile phone browser
User-Agent: AUDIOVOX-SMT5600
Disallow: /
# Augurnet Swiss (was www.augurnet.ch) search robot
User-Agent: augurfind
Disallow: /
# Augurnet Swiss (was www.augurnet.ch) search robot
User-Agent: augurnfind V-1.x
Disallow: /
# Auto Email Pro Email harvester
User-Agent: autoemailspider
Disallow: /
# Linkscan tool from Elsop
User-Agent: autohttp
Disallow: /
# Autowebdir - The Automatically Generated Web Directory
User-Agent: autowebdir 1.1 (www.autowebdir.com)
Disallow: /
# Altavista robot ??
User-Agent: AV Fetch 1.0
Disallow: /
# Avant Browser - IE based browser
User-Agent: Avant Browser (http://www.avantbrowser.com)
Disallow: /
# National Research Council Canada robot
User-Agent: AVSearch-1.0(peter.turney@nrc.ca)
Disallow: /
# Unknown robot from 205.203.108.xx (telerate.com)
User-Agent: AVSearch-2.0-fusionIdx-14-CompetitorWebSites
Disallow: /
# Altavista robot
User-Agent: AVSearch-3.0(AltaVista/AVC)
Disallow: /
# AWeb Amiga browser
User-Agent: AWeb
Disallow: /
# Axada search Germany robot
User-Agent: axadine/ (Axadine Crawler; http://www.axada.de/; )
Disallow: /
# Axmo search robot
User-Agent: AxmoRobot - Crawling your site for better indexing on www.axmo.com search engine.
Disallow: /
# Azureus Java BitTorrent Client
User-Agent: Azureus 2.x.x.x
Disallow: /
# Babaloo search robot (Slovenia)
User-Agent: BabalooSpider/1.3 (BabalooSpider; http://www.babaloo.si; spider@babaloo.si)
Disallow: /
# BaBoom Web Portal (ODP) robot (66.98.254.xx)
User-Agent: BaboomBot/1.x.x (+http://www.baboom.us)
Disallow: /
# BackStreet Browser - Offline browser / website downloader
User-Agent: BackStreet Browser 3.x
Disallow: /
# Baidu search (Japan) image crawler
User-Agent: BaiduImagespider+(+http://www.baidu.jp/search/s308.html)
Disallow: /
# Baidu spidering engine - used by diff. IPs
User-Agent: BaiDuSpider
Disallow: /
# Baidu spidering engine - used by diff. IPs
User-Agent: Baiduspider+(+http://help.baidu.jp/system/05.html)
Disallow: /
# Baidu spidering engine - used by diff. IPs
User-Agent: Baiduspider+(+http://www.baidu.com/search/spider.htm)
Disallow: /
# Baidu search (Japan) crawler
User-Agent: Baiduspider+(+http://www.baidu.com/search/spider_jp.html)
Disallow: /
# Balihoo - Vertical search engine crawler (beta)
User-Agent: Balihoo/Nutch-1.0-dev (Crawler for Balihoo.com search engine - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)
Disallow: /
# Project BanBots Perl script robot
User-Agent: BanBots/1.2 (spider@banbots.com)
Disallow: /
# Barca Pro email & PIM software
User-Agent: Barca/2.0.xxxx
Disallow: /
# Barca Pro email & PIM software
User-Agent: BarcaPro/1.4.xxxx
Disallow: /
# Barrahome crawler
User-Agent: BarraHomeCrawler (albertof@barrahome.org)
Disallow: /
# Unknown user agent from Microsoft
User-Agent: bCentral Billing Post-Process
Disallow: /
# Business.com robot
User-Agent: bdcindexer_2.6.2 (research@bdc)
Disallow: /
# Brandimensions Brand Protection robot
User-Agent: BDFetch
Disallow: /
# Bdncentral Sitesearch robot
User-Agent: BDNcentral Crawler v2.3 [en] (http://www.bdncentral.com/robot.html) (X11; I; Linux 2.0.44 i686)
Disallow: /
# beammachine web directory (Germany) link checking
User-Agent: BeamMachine/0.5 (dead link remover of www.beammachine.net)
Disallow: /
# Beauty robot for Cosmoty - German beauty and wellness search
User-Agent: beautybot/1.0 (+http://www.uchoose.de/crawler/beautybot/)
Disallow: /
# A Passion for Jazz music related search robot
User-Agent: BebopBot/2.5.1 ( crawler http://www.apassion4jazz.net/bebopbot.html )
Disallow: /
# LinkcheckerBeepware (site is down) web directory link checking
User-Agent: BeebwareDirectory/v0.01
Disallow: /
# Big Brother link checking tool
User-Agent: Big Brother (http://pauillac.inria.fr/~fpottier/)
Disallow: /
# GoonGee.com link popularity checking
User-Agent: Big Fish v1.0
Disallow: /
# BB4 network monitoring
User-Agent: BigBrother/1.6e
Disallow: /
# BigClique Search robot
User-Agent: BigCliqueBOT/1.03-dev (bigclicbot; http://www.bigclique.com; bot@bigclique.com)
Disallow: /
# Biglotron search (France) robot
User-Agent: BIGLOTRON (Beta 2;GNU/Linux)
Disallow: /
# Bigsearch.ca search robot
User-Agent: Bigsearch.ca/Nutch-x.x-dev (Bigsearch.ca Internet Spider; http://www.bigsearch.ca/; info@enhancededge.com)
Disallow: /
# Bilbo - web frontend for the Nessus Security Scanner
User-Agent: Bilbo/2.3b-UNIX
Disallow: /
# Bilgi.com (Beta) search robot - Turkey
User-Agent: BilgiBetaBot/0.8-dev (bilgi.com (Beta) ; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
Disallow: /
# Bilgi.com (Beta) search robot - Turkey
User-Agent: BilgiBot/1.0(beta) (http://www.bilgi.com/; bilgi at bilgi dot com)
Disallow: /
# Carnegie Mellon School robot/link checking ?
User-Agent: billbot wjj@cs.cmu.edu
Disallow: /
# Bitacle Blog Search Archive robot
User-Agent: Bitacle bot/1.1
Disallow: /
# Bitacle Blog Search Archive robot
User-Agent: Bitacle Robot (V:1.0;) (http://www.bitacle.com)
Disallow: /
# Biyubi Navigator - Mexican browser for Fenix OS
User-Agent: Biyubi/x.x (Sistema Fenix; G11; Familia Toledo; es-mx)
Disallow: /
# Blackberry Wireless Internet browser via Google WAP Proxy
User-Agent: BlackBerry7520/4.0.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/5.0.3.3 UP.Link/5.1.2.12 (Google WAP Proxy/1.0)
Disallow: /
# FS Consulting (was www.fsconsult.net) Black Widow web crawler
User-Agent: BlackWidow
Disallow: /
# BlackWidow web site scanner / downloading tool
User-Agent: BlackWidow
Disallow: /
# Blaiz Enterprises RawGrunt search
User-Agent: Blaiz-Bee/1.0 (+http://www.blaiz.net)
Disallow: /
# Blaiz Enterprises RawGrunt search
User-Agent: Blaiz-Bee/2.00.8222 (BE Internet Search Engine http://www.rawgrunt.com)
Disallow: /
# Blaiz Enterprises RawGrunt search
User-Agent: Blaiz-Bee/2.00.xxxx (+http://www.blaiz.net)
Disallow: /
# Blitzsuche Germany robot
User-Agent: BlitzBOT@tricus.net
Disallow: /
# Blitzsuche Germany robot
User-Agent: BlitzBOT@tricus.net (Mozilla compatible)
Disallow: /
# BlockNote web page editor
User-Agent: BlockNote.Net
Disallow: /
# blogdex robot from MIT.edu
User-Agent: BlogBot/1.x
Disallow: /
# BlogBridge RSS reader
User-Agent: BlogBridge 2.13 (http://www.blogbridge.com/)
Disallow: /
# Bloglines article search
User-Agent: Bloglines Title Fetch/1.0 (http://www.bloglines.com)
Disallow: /
# Bloglines graphics crawler
User-Agent: Bloglines-Images/0.1 (http://www.bloglines.com)
Disallow: /
# Bloglines news crawler
User-Agent: Bloglines/3.1 (http://www.bloglines.com)
Disallow: /
# FeedMap / BlogMap geo coding service
User-Agent: BlogMap (http://www.feedmap.net)
Disallow: /
# Intelliseek's BlogPulse blog search
User-Agent: Blogpulse (info@blogpulse.com)
Disallow: /
# Intelliseek's BlogPulse blog search
User-Agent: BlogPulseLive (support@blogpulse.com)
Disallow: /
# IceRocket Web search robot
User-Agent: BlogSearch/1.x +http://www.icerocket.com/
Disallow: /
# Art of Computing blog search project
User-Agent: blogsearchbot-pumpkin-3
Disallow: /
# BlogsNow realtime link tracker robot
User-Agent: BlogsNowBot, V 2.01 (+http://www.blogsnow.com/)
Disallow: /
# BlogVipe news and Blog crawler (Netherlands)
User-Agent: BlogVibeBot-v1.1 (spider@blogvibe.nl)
Disallow: /
# blogWatcher robot from Okumura Group Tokyo (131.112.182.xxx)
User-Agent: blogWatcher_Spider/0.1 (http://www.lr.pi.titech.ac.jp/blogWatcher/)
Disallow: /
# IceRocket Web search robot
User-Agent: BlogzIce/1.0 (+http://icerocket.com; rhodes@icerocket.com)
Disallow: /
# IceRocket Web search robot
User-Agent: BlogzIce/1.0 +http://www.icerocket.com/
Disallow: /
# Blooby search (beta) robot
User-Agent: BloobyBot
Disallow: /
# Balihoo - Search Engine for Advertising Media
User-Agent: Bloodhound/Nutch-0.9 (Testing Crawler for Research - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)
Disallow: /
# Bluefish HTML-editor for Linux
User-Agent: bluefish 0.6 HTML editor
Disallow: /
# Part of ButtMan remote access tool
User-Agent: BMCLIENT
Disallow: /
# Bookmark Express bookmark manager
User-Agent: BMLAUNCHER
Disallow: /
# Bobby web accessibility desktop testing tool
User-Agent: Bobby/4.0.x RPT-HTTPClient/0.3-3E
Disallow: /
# Boitho search (Norway) robot via 80.202.212.xx / 80.80.111.xx
User-Agent: boitho.com-dc/0.xx (http://www.boitho.com/dcbot.html)
Disallow: /
# Boitho search (Norway) robot via 80.202.212.xx / 80.80.111.xx
User-Agent: boitho.com-robot/1.x
Disallow: /
# Boitho search (Norway) robot via 80.202.212.xx / 80.80.111.xx
User-Agent: boitho.com-robot/1.x (http://www.boitho.com/bot.html)
Disallow: /
# Bookdog - Mac bookmark manager
User-Agent: Bookdog/x.x
Disallow: /
# Bookmark Buddy - favorite bookmark manager
User-Agent: Bookmark Buddy bookmark checker (http://www.bookmarkbuddy.net/)
Disallow: /
# Favourites managing program
User-Agent: Bookmark Renewal Check Agent [http://www.bookmark.ne.jp/]
Disallow: /
# Favourites managing program
User-Agent: Bookmark Renewal Check Agent [http://www.bookmark.ne.jp/] (Version 2.0beta)
Disallow: /
# Bookmark Base bookmark manager
User-Agent: BookmarkBase(2/;http://bookmarkbase.com)
Disallow: /
# cybercity.fr user robot / faked user agent ?
User-Agent: Bot mailto:craftbot@yahoo.com
Disallow: /
# BD-Brandprotect copyright infringement crawler
User-Agent: BPImageWalker/2.0 (www.bdbrandprotect.com)
Disallow: /
# BravoBrian bSTOP parental control
User-Agent: BravoBrian bstop.bravobrian.it
Disallow: /
# Robot for BravoBrian bSTOP
User-Agent: BravoBrian SpiderEngine MarcoPolo
Disallow: /
# BrightCloud web filtering for classifying websites
User-Agent: BrightCrawler (http://www.brightcloud.com/brightcrawler.asp)
Disallow: /
# Webarchive Project Bruinbot crawler
User-Agent: BruinBot (+http://webarchive.cs.ucla.edu/bruinbot.html)
Disallow: /
# Inktomi (Hotbot-Lycos NBCi) robot
User-Agent: BSDSeek/1.0
Disallow: /
# BravoBrian bSTOP parental control
User-Agent: BStop.BravoBrian.it Agent Detector
Disallow: /
# BitTorrent Search Engine btbot robot
User-Agent: BTbot/0.x (+http://www.btbot.com/btbot.html)
Disallow: /
# µTorrent BitTorrent Client
User-Agent: BTWebClient/180B(9704)
Disallow: /
# BuildCMS crawler - market monitoring project of BuildCMS
User-Agent: BuildCMS crawler (http://www.buildcms.com/crawler)
Disallow: /
# Bulkfeeds: RSS directory link checking
User-Agent: Bulkfeeds/r1752 (http://bulkfeeds.net/)
Disallow: /
# BullsEye/Intelliseek robot
User-Agent: BullsEye
Disallow: /
# Relevare Portal software robot
User-Agent: bumblebee@relevare.com
Disallow: /
# Microsoft server information robot (see link)
User-Agent: BunnySlippers
Disallow: /
# BurstFind search crawler (64.34.172.xx)
User-Agent: BurstFindCrawler/1.1 (crawler.burstfind.com; http://crawler.burstfind.com; crawler@burstfind.com)
Disallow: /
# Buscaplus (Spain) robot
User-Agent: Buscaplus Robi/1.0 (http://www.buscaplus.com/robi/)
Disallow: /
# Logitech Desktop Managers (LDM) Backweb (BW) update check
User-Agent: BW-C-2.0
Disallow: /
# Basic Web Hacking 3 fake user-agent from Hellbound Hackers challenges
User-Agent: bwh3_user_agent
Disallow: /
# Amfibi Search robot
User-Agent: Cabot/Nutch-0.9 (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)
Disallow: /
# Amfibi Search robot
User-Agent: Cabot/Nutch-1.0-dev (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)
Disallow: /
# Evolution integrated mail solution Camel TCP stream class
User-Agent: CamelHttpStream/1.0
Disallow: /
# Some user agent
User-Agent: Cancer Information and Support International;
Disallow: /
# Cosmix project crawler (204.14.48.x / 38.113.234.xxx)
User-Agent: carleson/1.0
Disallow: /
# Carnegie Mellon University WebBOT
User-Agent: Carnegie_Mellon_University_Research_WebBOT-->PLEASE READ-->http://www.andrew.cmu.edu/~brgordon/webbot/index.html http://www.andrew.cmu.edu/~brgordon/webbot/index.html
Disallow: /
# Carnegie Mellon University WebBOT
User-Agent: Carnegie_Mellon_University_WebCrawler http://www.andrew.cmu.edu/~brgordon/webbot/index.html
Disallow: /
# Catall.de search & web directory (Germany)
User-Agent: Catall Spider
Disallow: /
# UIUCs Cazoodle search based on MetaQuerier
User-Agent: CazoodleBot/CazoodleBot-0.1 (CazoodleBot Crawler; http://www.cazoodle.com/cazoodlebot; cazoodlebot@cazoodle.com)
Disallow: /
# CommonCrawl Foundation search crawler
User-Agent: CCBot/1.0 (+http://www.commoncrawl.org/bot.html)
Disallow: /
# Empyreum Ccubee (Czech) search engine solution
User-Agent: ccubee/x.x
Disallow: /
# Timewe mobile browser (WAP) simulator (Japan)
User-Agent: CDR/1.7.1 Simulator/0.7(+http://timewe.net) Profile/MIDP-1.0 Configuration/CLDC-1.0
Disallow: /
# Cisco Content Engine
User-Agent: CE-Preload
Disallow: /
# Nordic semantic search engine
User-Agent: CentiverseBot
Disallow: /
# Nordic semantic search engine
User-Agent: CentiverseBot - investigator
Disallow: /
# Nordic semantic search engine
User-Agent: CentiverseBot/3.0 (http://www.centiverse-project.net)
Disallow: /
# Floortransformed.com robot (link ckecking ??)
User-Agent: Ceramic Tile Installation Guide (http://www.floorstransformed.com)
Disallow: /
# CERN Line Mode Browser
User-Agent: CERN-LineMode/2.15
Disallow: /
# Cosmix project crawler (204.14.48.x / 38.113.234.xxx)
User-Agent: cfetch/1.0
Disallow: /
# MaxOS X CoreFoundation CFNetwork API
User-Agent: CFNetwork/x.x
Disallow: /
# cg-eye CGI checker
User-Agent: cg-eye interactive
Disallow: /
# Charon Amiga download manager
User-Agent: Charon/1.x (Amiga)
Disallow: /
# Chat Catcher blog monitoring robot
User-Agent: Chat Catcher/1.0
Disallow: /
# Checkbot link validation
User-Agent: Checkbot/1.xx LWP/5.xx
Disallow: /
# Checklinks - Perl link checker
User-Agent: CheckLinks/1.x.x
Disallow: /
# NTL user agent
User-Agent: CheckUrl
Disallow: /
# CheckWeb link validation
User-Agent: CheckWeb
Disallow: /
# Chilkat HTTP component user-agent
User-Agent: Chilkat/1.0.0 (+http://www.chilkatsoft.com/ChilkatHttpUA.asp)
Disallow: /
# Unknown spam bot from telekom.com.my (218.111.83.xxx)
User-Agent: China Local Browse 2.6
Disallow: /
# Chitika Inc. Blog advertising
User-Agent: Chitika ContentHit 1.0
Disallow: /
# Christcentral.com Christcrawler (was www.christcrawler.com)
User-Agent: ChristCRAWLER 2.0
Disallow: /
# CHttpClient - C++ class using WinInet
User-Agent: CHttpClient by Open Text Corporation
Disallow: /
# CipinetBot -Cipinet Search Engine Web Crawler
User-Agent: CipinetBot (http://www.cipinet.com/bot.html)
Disallow: /
# Cityreview regional search (Germany) link checking
User-Agent: Cityreview Robot (+http://www.cityreview.org/crawler/)
Disallow: /
# Commision Junction link checking spider
User-Agent: CJ Spider/
Disallow: /
# CJB Net anonymous socks proxy service (216.194.70.x)
User-Agent: CJB.NET Proxy
Disallow: /
# Claria (ex Gator) SearchScout robot (64.152.73.xx)
User-Agent: ClariaBot/1.0
Disallow: /
# Claymont Search robot
User-Agent: Claymont.com
Disallow: /
# Seznam Search (Czech Republic) robot
User-Agent: CloakDetect/0.9 (+http://fulltext.seznam.cz/)
Disallow: /
# Clush clustered search robot
User-Agent: Clushbot/2.x (+http://www.clush.com/bot.html)
Disallow: /
# Clush clustered search robot
User-Agent: Clushbot/3.x-BinaryFury (+http://www.clush.com/bot.html)
Disallow: /
# Clush clustered search robot
User-Agent: Clushbot/3.xx-Ajax (+http://www.clush.com/bot.html)
Disallow: /
# Clush clustered search robot
User-Agent: Clushbot/3.xx-Hector (+http://www.clush.com/bot.html)
Disallow: /
# Clush clustered search robot
User-Agent: Clushbot/3.xx-Peleus (+http://www.clush.com/bot.html)
Disallow: /
# COAST Webmaster - Web management and maintenance software
User-Agent: COAST WebMaster Pro/4.x.x.xx (Windows NT)
Disallow: /
# Proposed Content-Based Image Tracking System (CoBITS) P2P crawler
User-Agent: CoBITSProbe
Disallow: /
# Cocoa del.icio.us (social bookmarks manager) client for Mac OS X
User-Agent: Cocoal.icio.us/1.0 (v36) (Mac OS X; http://www.scifihifi.com/cocoalicious)
Disallow: /
# Cogent Search Bot from Cogent Software Solutions for unknown purposes
User-Agent: Cogentbot/1.X (+http://www.cogentsoftwaresolutions.com/bot.html)
Disallow: /
# Cold Fusion server used by various IPs i.e.: - NetWORLD web catalogue link checking
User-Agent: ColdFusion
Disallow: /
# Cold Fusion server used by Bookmark Tracker - online favourites managment
User-Agent: ColdFusion (BookmarkTracker.com)
Disallow: /
# WebCollage Syndicator graphics crawler/collector
User-Agent: collage.cgi/1.xx
Disallow: /
# Combine harvesting & indexing robot
User-Agent: combine/0.0
Disallow: /
# Combine harvesting & indexing robot
User-Agent: Combine/2.0 http://combine.it.lth.se/
Disallow: /
# Combine harvesting & indexing robot
User-Agent: Combine/3 http://combine.it.lth.se/
Disallow: /
# Combine harvesting & indexing robot
User-Agent: Combine/x.0
Disallow: /
# cometrics Web Content Mining solution - Germany
User-Agent: cometrics-bot, http://www.cometrics.de
Disallow: /
# Wildsoft Germany (closed) Internet client system user-agent (was www.oskarweb.de)
User-Agent: Commerce Browser Center
Disallow: /
# UCLA Complex Networks Groups Complex Network Analysis
User-Agent: complex_network_group/Nutch-0.9-dev (discovering the structure of the world-wide-web; http://cantor.ee.ucla.edu/~networks/crawl; nimakhaj@gmail.com)
Disallow: /
# Hungarian Academy of Sciences data mining search
User-Agent: Computer_and_Automation_Research_Institute_Crawler crawler@ilab.sztaki.hu
Disallow: /
# ComRite Chinese Search Engine for Oversea Web Sites (69.248.26.xx)
User-Agent: Comrite/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
Disallow: /
# unknown
User-Agent: Contact
Disallow: /
# Probably E-Mail harvesting robot - same as LMQueueBot
User-Agent: ContactBot/0.2
Disallow: /
# ContentSmartz e-mail harvesting tools
User-Agent: ContentSmartz
Disallow: /
# Internet Explorer versions 4.x and 5 plugin content
User-Agent: contype
Disallow: /
# Converas RetrievalWare Internet Spider (63.241.61.x)
User-Agent: Convera Internet Spider V6.x
Disallow: /
# Converas RetrievalWare Internet Spider (63.241.61.x)
User-Agent: ConveraCrawler/0.2
Disallow: /
# Converas RetrievalWare Internet Spider (63.241.61.x)
User-Agent: ConveraCrawler/0.9d (+http://www.authoritativeweb.com/crawl)
Disallow: /
# Converas RetrievalWare Internet Spider (63.241.61.x)
User-Agent: ConveraMultiMediaCrawler/0.1 (+http://www.authoritativeweb.com/crawl)
Disallow: /
# Suchmaschine21 (Germany) robot
User-Agent: CoolBot
Disallow: /
# Coral Content Distribution Network
User-Agent: CoralWebPrx/0.1.1x (See http://coralcdn.org/)
Disallow: /
# Xyleme SA France robot
User-Agent: cosmos/0.8_(robot@xyleme.com)
Disallow: /
# Xyleme SA France robot
User-Agent: cosmos/0.9_(robot@xyleme.com)
Disallow: /
# Coteo.com - local French directory link checking
User-Agent: CoteoNutchCrawler/Nutch-0.9 (info [at] coteo [dot] com)
Disallow: /
# Cougarsearch.com robot
User-Agent: CougarSearch/0.x (+http://www.cougarsearch.com/faq.shtml)
Disallow: /
# Covac Arachnid Web Crawler
User-Agent: Covac TexAs Arachbot
Disallow: /
# CoverScout for iTunes - CD cover search tool
User-Agent: CoverScout%203/3.0.1 CFNetwork/339.5 Darwin/9.5.0 (i386) (iMac5,1)
Disallow: /
# Naver Japan / Korea robot
User-Agent: Cowbot-0.1 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)
Disallow: /
# Naver Japan / Korea robot
User-Agent: Cowbot-0.1.x (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)
Disallow: /
# Converas RetrievalWare Internet Spider
User-Agent: CrawlConvera0.1 (CrawlConvera@yahoo.com)
Disallow: /
# unknown robot via Level3.net
User-Agent: Crawler
Disallow: /
# Cometsystems Comet Search robot via Findwhat (now Miva)
User-Agent: Crawler (cometsearch@cometsystems.com)
Disallow: /
# Crawler.de / Abacho robot
User-Agent: Crawler admin@crawler.de
Disallow: /
# Crawler.de / Abacho robot
User-Agent: Crawler V 0.2.x admin@crawler.de
Disallow: /
# Alexa crawler
User-Agent: crawler@alexa.com
Disallow: /
# Pinpoint WAP search robot
User-Agent: CrawlerBoy Pinpoint.com
Disallow: /
# Crawlly Beta search - Germany (72.232.194.2xx)
User-Agent: Crawllybot/0.1 (Crawllybot; +http://www.crawlly.com; crawler@crawlly.com)
Disallow: /
# Creatice Commons using Nutch open source robot
User-Agent: CreativeCommons/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)
Disallow: /
# Croccrawler robot
User-Agent: CrocCrawler vx.3 [en] (http://www.croccrawler.com) (X11; I; Linux 2.0.44 i686)
Disallow: /
# Web mining project from CSCI 659 (computer science course) at Indiana Univerity
User-Agent: csci_b659/0.13
Disallow: /
# CSE HTML Validator for Windows
User-Agent: CSE HTML Validator Professional (http://www.htmlvalidator.com/)
Disallow: /
# Cuam - IE based browser
User-Agent: Cuam Ver0.050bx
Disallow: /
# Cuasar (Spain) music / ringtone search spider
User-Agent: Cuasarbot/0.9b http://www.cuasar.com/spider_beta/
Disallow: /
# Curl file transferring tool
User-Agent: curl/7.10.x (i386-redhat-linux-gnu) libcurl/7.10.x OpenSSL/0.9.7a ipv6 zlib/1.1.4
Disallow: /
# Curl file transferring tool
User-Agent: curl/7.7.x (i386--freebsd4.3) libcurl 7.7.x (SSL 0.9.6) (ipv6 enabled)
Disallow: /
# Curl file transferring tool
User-Agent: curl/7.8 (i686-pc-linux-gnu) libcurl 7.8 (OpenSSL 0.9.6)
Disallow: /
# Curl file transferring tool
User-Agent: curl/7.9.x (win32) libcurl 7.9.x
Disallow: /
# CurryGuide UK link check robot
User-Agent: CurryGuide SiteScan 1.1
Disallow: /
# Custo web site spidering tool (link checking)
User-Agent: Custo x.x (www.netwu.com)
Disallow: /
# Bisnisseek (was www.bisnisseek.com) robot
User-Agent: Custom Spider www.bisnisseek.com /1.0
Disallow: /
# Cyberdog Mac Browser (was www.cyberdog.org)
User-Agent: Cyberdog/2.0 (Macintosh; 68k)
Disallow: /
# CyberPatrol LLC robot for web filtering software
User-Agent: CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)
Disallow: /
# CyberSpyder Link Test software
User-Agent: CyberSpyder Link Test/2.1.12 (admin@mspennyworth.com)
Disallow: /
# Cydral image & site search spider
User-Agent: CydralSpider/1.x (Cydral Web Image Search; http://www.cydral.com)
Disallow: /
# Cydral image & site search spider
User-Agent: CydralSpider/3.0 (Cydral Image Search; http://www.cydral.com)
Disallow: /
# Downloadaccelerator download manager
User-Agent: DA 3.5 (www.lidan.com)
Disallow: /
# Downloadaccelerator download manager
User-Agent: DA 4.0
Disallow: /
# Downloadaccelerator download manager
User-Agent: DA 4.0 (www.downloadaccelerator.com)
Disallow: /
# Downloadaccelerator download manager
User-Agent: DA 5.0
Disallow: /
# Downloadaccelerator download manager
User-Agent: DA 7.0
Disallow: /
# Download Accelerator Plus download manager
User-Agent: DAP x.x
Disallow: /
# PowerTCP ActiveX control tool
User-Agent: Dart Communications PowerTCP
Disallow: /
# Unknown bot from Kornet Korea (218.149.129.xxx) scans for Perl Awstats
User-Agent: DataCha0s/2.0
Disallow: /
# Unknown UA looking for Awstats Perl components
User-Agent: DataCha0s/2.0
Disallow: /
# INFOMINE/iVia Scholary Internet Resource Collections robot
User-Agent: DataFountains/DMOZ Downloader
Disallow: /
# INFOMINE/iVia Scholary Internet Resource Collections robot
User-Agent: DataFountains/Dmoz Downloader (http://ivia.ucr.edu/useragents.shtml)
Disallow: /
# INFOMINE/iVia Scholary Internet Resource Collections robot
User-Agent: DataFountains/DMOZ Feature Vector Corpus Creator (http://ivia.ucr.edu/useragents.shtml)
Disallow: /
# DataparkSearch open source search engine
User-Agent: DataparkSearch/4.47 (+http://dataparksearch.org/bot)
Disallow: /
# DataparkSearch open source search engine
User-Agent: DataparkSearch/4.xx (http://www.dataparksearch.org/)
Disallow: /
# DataSpear Directory robot (24.109.29.xx)
User-Agent: DataSpear/1.0 (Spider; http://www.dataspear.com/spider.html; spider@dataspear.com)
Disallow: /
# DataSpear Directory robot (24.109.29.xx)
User-Agent: DataSpearSpiderBot/0.2 (DataSpear Spider Bot; http://dssb.dataspear.com/bot.html; dssb@dataspear.com)
Disallow: /
# Sicher-durchs-Netz German security related search (62.75.220.xxx)
User-Agent: DatenBot( http://www.sicher-durchs-netz.de/bot.html)
Disallow: /
# Wholeweb robot
User-Agent: DaviesBot/1.7 (www.wholeweb.net)
Disallow: /
# Daypop blog - weblog - online mag search spider
User-Agent: daypopbot/0.x
Disallow: /
# dbDig search engine
User-Agent: dbDig(http://www.prairielandconsulting.com)
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - wanweb.net (208.6.163.xxx) - cox.net (68.4.xxx.xxx)
User-Agent: DBrowse 1.4b
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - pacbell.net (67.112.xxx.xxx)
User-Agent: DBrowse 1.4d
Disallow: /
# DC-Sakura download manager
User-Agent: DC-Sakura/x.xx
Disallow: /
# unknown divine/Openmarket.com robot
User-Agent: dCSbot/1.1
Disallow: /
# some (website) downloading tool
User-Agent: DDD
Disallow: /
# Unknown user agent
User-Agent: dds explorer v1.0 beta
Disallow: /
# Searchengine.com (Germany) submission checking / robot (84.73.57.xx)
User-Agent: de.searchengine.comBot 1.2 (http://de.searchengine.com/spider)
Disallow: /
# DLC Perl HTTP link checking
User-Agent: DeadLinkCheck/0.4.0 libwww-perl/5.xx
Disallow: /
# Sootle Web Directory deep link checker
User-Agent: Deep Link Calculator v1.0
Disallow: /
# deepak-USC/ISI robot from USC/Information Science Institute
User-Agent: deepak-USC/ISI
Disallow: /
# Deepindex robot
User-Agent: DeepIndex
Disallow: /
# Zetbot search Belgium (213.41.128.xx) using Deepindex robot
User-Agent: DeepIndex ( http://www.zetbot.com )
Disallow: /
# Deepindex robot
User-Agent: DeepIndex (www.en.deepindex.com)
Disallow: /
# Deepindex robot (via Paragon.net Canada)
User-Agent: DeepIndexer.ca
Disallow: /
# del.icio.us picture robot for thumbnail preview via Yahoo
User-Agent: del.icio.us-thumbnails/1.0 Mozilla/5.0 (compatible; Konqueror/3.4; FreeBSD) KHTML/3.4.2 (like Gecko)
Disallow: /
# DeleGate application level gateway / proxy server
User-Agent: DeleGate/9.0.5-fix1
Disallow: /
# Some site scanning tool from 217.34.59.xxx (btopenworld.com)
User-Agent: Demo Bot DOT 16b
Disallow: /
# Some site scanning tool from 68.154.96.xx (bellsouth.net)
User-Agent: Demo Bot Z 16b
Disallow: /
# Denmex Websearch robot/link checking
User-Agent: Denmex websearch (http://search.denmex.com)
Disallow: /
# DataBecker Bilder Sauger (discontinued) web graphics downloader
User-Agent: Der große BilderSauger 2.00u
Disallow: /
# Searchspider.com robot (72.245.225.xxx)
User-Agent: dev-spider2.searchpsider.com/1.3b
Disallow: /
# DevComponents HTMLDocument Class Library for Visual Studio.net
User-Agent: DevComponents.com HtmlDocument Object
Disallow: /
# DiaGem Japan web crawler
User-Agent: DiaGem/1.1 (http://www.skyrocket.gr.jp/diagem.html)
Disallow: /
# Claria (ex Gator) SearchScout robot (64.152.73.xx)
User-Agent: Diamond/x.0
Disallow: /
# Claria (ex Gator) SearchScout robot (64.152.73.xx)
User-Agent: DiamondBot
Disallow: /
# Diggit! robot
User-Agent: Digger/1.0 JDK/1.3.0rc3
Disallow: /
# OpenPortal4U robot
User-Agent: DigOut4U
Disallow: /
# Findsame.com (site is offline) / Digital-Integrity robot
User-Agent: DIIbot/1.2
Disallow: /
# Dillo Web Browser
User-Agent: Dillo/0.8.5-i18n-misc
Disallow: /
# Dillo Web Browser
User-Agent: Dillo/0.x.x
Disallow: /
# disastrous - a del.icio.us link checker based on Python
User-Agent: disastrous/1.0.5 (running with Python 2.5.1; http://www.bortzmeyer.org/disastrous.html; archangel77@del.icio.us)
Disallow: /
# DISCo Pump offline browser / website ripper
User-Agent: DISCo Pump x.x
Disallow: /
# Unkown robot from Discovery Engine Corp.
User-Agent: disco/Nutch-0.9 (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)
Disallow: /
# Unkown robot from Discovery Engine Corp.
User-Agent: disco/Nutch-1.0-dev (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)
Disallow: /
# Ditto picture search robot
User-Agent: DittoSpyder
Disallow: /
# some download agent
User-Agent: dlman
Disallow: /
# Naver Japan / Korea robot
User-Agent: dloader(NaverRobot)/1.0
Disallow: /
# DNS Right - Online DNS tools
User-Agent: DNSRight.com WebBot Link Ckeck Tool. Report abuse to: dnsr@dnsright.com
Disallow: /
# NTT DoCoMo (Japan) robot
User-Agent: DoCoMo/1.0/Nxxxi/c10
Disallow: /
# NTT DoCoMo (Japan) robot
User-Agent: DoCoMo/1.0/Nxxxi/c10/TB
Disallow: /
# Google (216.239.39.x) proxy server
User-Agent: DoCoMo/1.0/P502i/c10 (Google CHTML Proxy/1.0)
Disallow: /
# NTT DoCoMo (Japan) robot
User-Agent: DoCoMo/2.0 P900iV(c100;TB;W24H11)
Disallow: /
# NTT DoCoMo (Japan) proxy server (210.136.161.1xx)
User-Agent: DoCoMo/2.0 SH901iS(c100;TB;W24H12),gzip(gfe) (via translate.google.com)
Disallow: /
# Yahoo Search Japan robot (203.216.197.xxx)
User-Agent: DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)
Disallow: /
# Yahoo Search Japan robot (203.216.197.xxx)
User-Agent: DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)
Disallow: /
# DocZilla - Mozilla-based SGML/XML/HTML- browser
User-Agent: DocZilla/1.0 (Windows; U; WinNT4.0; en-US; rv:1.0.0) Gecko/20020804
Disallow: /
# unknown robot from AGMLAB Information Technologies (Information retrieval system ?)
User-Agent: dodgebot/experimental
Disallow: /
# Donut P - Japanese IE based browser
User-Agent: DonutP; Windows98SE
Disallow: /
# Unknown robot from douban search (China) - maybe image crawling
User-Agent: Doubanbot/1.0 (bot@douban.com http://www.douban.com)
Disallow: /
# Download Demon/Netzip download manager
User-Agent: Download Demon/3.x.x.x
Disallow: /
# Download Druid IE plugin download manager
User-Agent: Download Druid 2.x
Disallow: /
# Download Express download manager
User-Agent: Download Express 1.0
Disallow: /
# Download Master download manager
User-Agent: Download Master
Disallow: /
# Download Ninja download manager (Japan)
User-Agent: Download Ninja 3.0
Disallow: /
# Download Wonder download manager
User-Agent: Download Wonder
Disallow: /
# Download-Tipp Germany link checking
User-Agent: Download-Tipp Linkcheck (http://download-tipp.de/)
Disallow: /
# download.exe .NET based downloading tool
User-Agent: Download.exe(1.1) (+http://www.sql-und-xml.de/freeware-tools/)
Disallow: /
# Download Direct download manager
User-Agent: DownloadDirect.1.0
Disallow: /
# Dr.WEB online virus scanner
User-Agent: Dr.Web (R) online scanner: http://online.drweb.com/
Disallow: /
# Dragonfly CMS - Open Source content management system
User-Agent: Dragonfly File Reader
Disallow: /
# Drecom Japan (210.233.67.xxx) - Blog search ??
User-Agent: Drecombot/1.0 (http://career.drecom.jp/bot.html)
Disallow: /
# Drupal - open source content management platform
User-Agent: Drupal (+http://drupal.org/)
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - cox.net (68.5.xxx.xxx) - pacbell.net (64.16x.xxx.xxx)
User-Agent: DSurf15a 01
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - cox.net (68.4.xxx.xxx)
User-Agent: DSurf15a 71
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - verizon.net (4.47.xxx.xxx)
User-Agent: DSurf15a 81
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - eastlink.ca (24.222.xxx.xxx) - cogeco.net (216.221.8x.xxx)
User-Agent: DSurf15a VA
Disallow: /
# DTAAgent Java object for data collecting
User-Agent: DTAAgent
Disallow: /
# dt Search Spider software
User-Agent: dtSearchSpider
Disallow: /
# Fourelle Venturi proxy server
User-Agent: Dual Proxy
Disallow: /
# Duck Duck Go search crawler
User-Agent: DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)
Disallow: /
# DumbFind.com robot
User-Agent: Dumbot(version 0.1 beta - dumbfind.com)
Disallow: /
# DumbFind.com robot
User-Agent: Dumbot(version 0.1 beta - http://www.dumbfind.com/dumbot.html)
Disallow: /
# DumbFind.com robot
User-Agent: Dumbot(version 0.1 beta)
Disallow: /
# Vigiltech e-Sense user research robot (website is offline)
User-Agent: e-sense 1.0 ea(www.vigiltech.com/esensedisclaim.html)
Disallow: /
# e-Society Project (Japan) crawler (133.9.238.xx)
User-Agent: e-SocietyRobot(http://www.yama.info.waseda.ac.jp/~yamana/es/)
Disallow: /
# Global Opto's eApollo Flash based search engine - Taiwan
User-Agent: eApolloBot/2.0 (compatible; heritrix/2.0.0-SNAPSHOT-20071024.170148 +http://www.eapollo-opto.com)
Disallow: /
# Earthcom (Czech Republic) search robot (194.108.39.xx)
User-Agent: EARTHCOM.info/1.x [www.earthcom.info]
Disallow: /
# Earthcom (Czech Republic) search robot (194.108.39.xx)
User-Agent: EARTHCOM.info/1.xbeta [www.earthcom.info]
Disallow: /
# Keywen Encyclopedia Bot
User-Agent: EasyDL/3.xx
Disallow: /
# Keywen Encyclopedia Bot
User-Agent: EasyDL/3.xx http://keywen.com/Encyclopedia/Bot
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - swbell.net (65.66.xxx.xxx)
User-Agent: EBrowse 1.4b
Disallow: /
# eCatch (now Wysigot) offline browser
User-Agent: eCatch/3.0
Disallow: /
# Echo.fr robot
User-Agent: EchO!/2.0
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - cox.net (68.4.xxx.xxx)
User-Agent: Educate Search VxB
Disallow: /
# Xdefine text search engine robot - based on Egothor open source crawler
User-Agent: egothor/3.0a (+http://www.xdefine.org/robot.html)
Disallow: /
# Egoto Search robot
User-Agent: EgotoBot/4.8 (+http://www.egoto.com/about.htm)
Disallow: /
# eJupiter searcg robot (206.191.49.xx)
User-Agent: ejupiter.com
Disallow: /
# TimelyWeb web page monitoring tool
User-Agent: EldoS TimelyWeb/3.x
Disallow: /
# Elftales crawler for uChoose theme based search (Germany)
User-Agent: elfbot/1.0 (+http://www.uchoose.de/crawler/elfbot/)
Disallow: /
# DAUMOA - RSS search robot of Daum
User-Agent: ELI/20070402:2.0 (DAUM RSS Robot, Daum Communications Corp.; +http://ws.daum.net/aboutkr.html)
Disallow: /
# ELinks text mode browser
User-Agent: ELinks (0.x.x; Linux 2.4.20 i586; 132x60)
Disallow: /
# ELinks text mode browser
User-Agent: ELinks/0.x.x (textmode; NetBSD 1.6.2 sparc; 132x43)
Disallow: /
# Sonic E-mail collector
User-Agent: EmailSiphon
Disallow: /
# EmailSpider E-mail harvesting software
User-Agent: EmailSpider
Disallow: /
# Trellian EMailWolf E-mail collector
User-Agent: EmailWolf 1.00
Disallow: /
# EmeraldShield spam and web filtration services
User-Agent: EmeraldShield.com WebBot
Disallow: /
# EmeraldShield spam and web filtration services
User-Agent: EmeraldShield.com WebBot (http://www.emeraldshield.com/webbot.aspx)
Disallow: /
# Empas search Korea robot (220.95.22x.xxx)
User-Agent: EMPAS_ROBOT
Disallow: /
# Enabot - Enaball semantic search project crawler
User-Agent: EnaBot/1.x (http://www.enaball.com/crawler.html)
Disallow: /
# endo - Mac news site and blog aggregator
User-Agent: endo/1.0 (Mac OS X; ppc i386; http://kula.jp/endo)
Disallow: /
# Enfish Personal search tool
User-Agent: Enfish Tracker
Disallow: /
# Enterprise Search engine software (64.202.165.xxx)
User-Agent: Enterprise_Search/1.0
Disallow: /
# Enterprise Search engine software (64.202.165.xxx)
User-Agent: Enterprise_Search/1.0.xxx
Disallow: /
# Enterprise Search engine software (64.202.165.xxx)
User-Agent: Enterprise_Search/1.00.xxx;MSSQL (http://www.innerprise.net/es-spider.asp)
Disallow: /
# Envolk Web Search robot
User-Agent: envolk/1.7 (+http://www.envolk.com/envolkspiderinfo.php)
Disallow: /
# Envolk Web Search robot
User-Agent: envolk[ITS]spider/1.6(+http://www.envolk.com/envolkspider.html)
Disallow: /
# EroCrawler adult search robot
User-Agent: EroCrawler
Disallow: /
# Enterprise Search engine software (64.202.165.xxx)
User-Agent: ES.NET_Crawler/2.0 (http://search.innerprise.net/)
Disallow: /
# ExactSEEK (Jayde Online) robot
User-Agent: eseek-larbin_2.6.2 (crawler@exactseek.com)
Disallow: /
# ESI Smart-Spider toolkit
User-Agent: ESISmartSpider
Disallow: /
# e-Style ISP search (Russia) robot (217.174.103.xxx)
User-Agent: eStyleSearch 4 (compatible; MSIE 6.0; Windows NT 5.0)
Disallow: /
# Some site scanning tool via diff. IPs
User-Agent: ESurf15a 15
Disallow: /
# Eurip.com - European Internet Portal robot
User-Agent: EuripBot/0.x (+http://www.eurip.com) GetFile
Disallow: /
# Eurip.com - European Internet Portal robot
User-Agent: EuripBot/0.x (+http://www.eurip.com) GetRobots
Disallow: /
# Eurip.com - European Internet Portal robot
User-Agent: EuripBot/0.x (+http://www.eurip.com) PreCheck
Disallow: /
# Ayell Euronet business directory robot
User-Agent: Eurobot/1.0 (http://www.ayell.eu)
Disallow: /
# Evaal Search Engine robot
User-Agent: EvaalSE - bot@evaal.com
Disallow: /
# Eventax event search (Germany)
User-Agent: eventax/1.3 (eventax; http://www.eventax.de/; info@eventax.de)
Disallow: /
# Vulcan Inc. Everest crawler (in development)
User-Agent: Everest-Vulcan Inc./0.1 (R&D project; host=e-1-24; http://everest.vulcan.com/crawlerhelp)
Disallow: /
# Vulcan Inc. Everest crawler (in development)
User-Agent: Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
Disallow: /
# Exalead Websearch image crawler (193.47.80.xx)
User-Agent: Exabot-Images/1.0
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: Exabot-Test/1.0
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: Exabot/2.0
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: Exabot/3.0
Disallow: /
# eXact Search Bar for IE
User-Agent: ExactSearch
Disallow: /
# ExactSEEK (Jayde Online) robot
User-Agent: ExactSeek Crawler/0.1
Disallow: /
# ExactSEEK (Jayde Online) robot
User-Agent: exactseek-crawler-2.63 (crawler@exactseek.com)
Disallow: /
# ExactSEEK (Jayde Online) robot
User-Agent: exactseek-pagereaper-2.63 (crawler@exactseek.com)
Disallow: /
# ExactSEEK (Jayde Online) robot (69.9.181.1xx)
User-Agent: exactseek.com
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: Exalead NG/MimeLive Client (convert/http/0.120)
Disallow: /
# Excalibur (now Convera) spider software
User-Agent: Excalibur Internet Spider V6.5.4
Disallow: /
# Execrawl software search using nutch
User-Agent: Execrawl/1.0 (Execrawl; http://www.execrawl.com/; bot@execrawl.com)
Disallow: /
# exooba crawler for exooba search pre-alpha development
User-Agent: exooba crawler/exooba crawler (crawler for exooba.com; http://www.exooba.com/; info at exooba dot com)
Disallow: /
# exooba crawler for exooba search pre-alpha development
User-Agent: exooba/exooba crawler (exooba; exooba)
Disallow: /
# Mirago UK Robot
User-Agent: ExperimentalHenrytheMiragoRobot
Disallow: /
# Expired Domain Sleuth domain name tool
User-Agent: Expired Domain Sleuth
Disallow: /
# Express Web Pictures image browser
User-Agent: Express WebPictures (www.express-soft.com)
Disallow: /
# Extractor Pro e-mail collector
User-Agent: ExtractorPro
Disallow: /
# Exisoftware image grabber and downloading tool
User-Agent: Extreme Picture Finder
Disallow: /
# Download-Tipp Germany robot
User-Agent: EyeCatcher (Download-tipp.de)/1.0
Disallow: /
# Factbites search robot
User-Agent: Factbot 1.09 (see http://www.factbites.com/webmasters.php)
Disallow: /
# Factbites search robot
User-Agent: factbot : http://www.factbites.com/robots
Disallow: /
# FaEdit Professional - Japanese bookmark manager
User-Agent: FaEdit/2.0.x
Disallow: /
# FairAd user
User-Agent: FairAd Client
Disallow: /
# Safe-t.net web filtering service
User-Agent: FANGCrawl/0.01
Disallow: /
# Drew Curtis' FARK.com link checking
User-Agent: FARK.com link verifier
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: Fast Crawler Gold Edition
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST Enterprise Crawler 6 (Experimental)
Disallow: /
# Fast Enterprise Crawler (66.151.181.xx) for Scirus scienctific information search
User-Agent: FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST Enterprise Crawler 6 used by Cobra Development (admin@fastsearch.com)
Disallow: /
# Comperio Web Miner based on Fast ESP
User-Agent: FAST Enterprise Crawler 6 used by Comperio AS (sts@comperio.no)
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST Enterprise Crawler 6 used by FAST (FAST)
Disallow: /
# Pages Jaunes business search (France) robot using Fast Enterprise Crawler
User-Agent: FAST Enterprise Crawler 6 used by Pages Jaunes (pvincent@pagesjaunes.fr)
Disallow: /
# Fast/Alltheweb crawler for Sensis.com.au Australian search (66.151.181.xx)
User-Agent: FAST Enterprise Crawler 6 used by Sensis.com.au Web Crawler (search_comments\at\sensis\dot\com\dot\au)
Disallow: /
# SPH Search - Singapore related search using Fast crawler
User-Agent: FAST Enterprise Crawler 6 used by Singapore Press Holdings (crawler@sphsearch.sg)
Disallow: /
# FAST Enterprise Crawler used by WWU (University of Muenster - Germany)
User-Agent: FAST Enterprise Crawler 6 used by WWU (wardi@uni-muenster.de)
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST Enterprise Crawler/6 (www.fastsearch.com)
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST Enterprise Crawler/6.4 (helpdesk at fast.no)
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST FirstPage retriever (compatible; MSIE 5.5; Mozilla/4.0)
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FAST MetaWeb Crawler (helpdesk at fastsearch dot com)
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: Fast PartnerSite Crawler
Disallow: /
# Fast/Alltheweb multimedia crawler
User-Agent: FAST-WebCrawler/2.2.10 (Multimedia Search) (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/2.2.6 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/2.2.7 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/2.2.8 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.2 test
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.3 (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.4/Nirvana (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.4/PartnerSite (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.5 (atw-crawler at fast dot no; http://fast.no/support.php?c=faqs/crawler)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.6 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.6/FirstPage (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.8 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: FAST-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)
Disallow: /
# Fast/Alltheweb multimedia crawler
User-Agent: FAST-WebCrawler/3.x Multimedia
Disallow: /
# Fast/Alltheweb multimedia crawler
User-Agent: FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no)
Disallow: /
# Fastbot search Germany crawler (80.252.104.1xx)
User-Agent: fastbot crawler beta 2.0 (+http://www.fastbot.de)
Disallow: /
# Ay-Up geo sync search robot
User-Agent: FastBug http://www.ay-up.com
Disallow: /
# Fast/Alltheweb crawler (66.151.181.xx)
User-Agent: FastCrawler 3.0.1 (crawler@1klik.dk)
Disallow: /
# Fast/Alltheweb crawler used by SuperPages.com
User-Agent: FastSearch Web Crawler for Verizon SuperPages (kevin.watters@fastsearch.com)
Disallow: /
# Favcollector Favicon collecting robot
User-Agent: Favcollector/2.0 (info@favcollector.com http://www.favcollector.com/)
Disallow: /
# FavIconizer - IE favorites icons refreshing tool
User-Agent: FavIconizer
Disallow: /
# favo.eu (Germany) search robot
User-Agent: favo.eu crawler/0.6 (http://www.favo.eu)
Disallow: /
# ZD's FavOrg favourites managing program
User-Agent: FavOrg
Disallow: /
# Campulka.net Favorites checking tool
User-Agent: Favorites Checking (http://campulka.net)
Disallow: /
# Favorites Sweeper bookmark checker
User-Agent: Favorites Sweeper v.2.03
Disallow: /
# FaXo Search robot (69.152.89.xx)
User-Agent: Faxobot/1.0
Disallow: /
# Free Download Manager (FDM) download accelerator
User-Agent: FDM 1.x
Disallow: /
# Free Download Manager (FDM) download accelerator
User-Agent: FDM 2.x
Disallow: /
# RSS Feed Seeker bot (68.225.95.2xx)
User-Agent: Feed Seeker Bot (RSS Feed Seeker http://www.MyNewFavoriteThing.com/fsb.php)
Disallow: /
# Feed24 news feed and blog search
User-Agent: Feed24.com
Disallow: /
# Feed::Find - Syndication feed (RSS/Atom) auto-discovery
User-Agent: Feed::Find/0.0x
Disallow: /
# Feedable beta web based RSS service
User-Agent: Feedable/0.1 (compatible; MSIE 6.0; Windows NT 5.1)
Disallow: /
# Unknown robot from the University of Tokyo (157.82.157.xx)
User-Agent: FeedChecker/0.01
Disallow: /
# FeedDemon RSS reader
User-Agent: FeedDemon/2.7 (http://www.newsgator.com/; Microsoft Windows XP)
Disallow: /
# Google news feed feetcher for iGoogle gadgets
User-Agent: Feedfetcher-Google-iGoogleGadgets; (+http://www.google.com/feedfetcher.html)
Disallow: /
# Google Feedfetcher - RSS and Atom feed crawler
User-Agent: Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)
Disallow: /
# FeedForAll RSS feed robot
User-Agent: FeedForAll rss2html.php v2
Disallow: /
# FeedHub news feed personalization engine powered by mSpoke
User-Agent: FeedHub FeedDiscovery/1.0 (http://www.feedhub.com)
Disallow: /
# FeedHub news feed personalization engine powered by mSpoke
User-Agent: FeedHub MetaDataFetcher/1.0 (http://www.feedhub.com)
Disallow: /
# Feedjit news feed service favicon crawler
User-Agent: Feedjit Favicon Crawler 1.0
Disallow: /
# Newsbrain Feedreader3
User-Agent: Feedreader 3.xx (Powered by Newsbrain)
Disallow: /
# FeedShow online RSS feed reader
User-Agent: Feedshow/x.0 (http://www.feedshow.com; 1 subscriber)
Disallow: /
# FeedShow online RSS feed reader
User-Agent: FeedshowOnline (http://www.feedshow.com)
Disallow: /
# Feedster RSS feed search
User-Agent: Feedster Crawler/3.0; Feedster, Inc.
Disallow: /
# FeedZcollector - Feed (RSS, ATOM and RDF) capturing software
User-Agent: FeedZcollector v1.x (Platinum) http://www.feeds4all.com/feedzcollector
Disallow: /
# MixCat robot
User-Agent: Felix - Mixcat Crawler (+http://mixcat.com)
Disallow: /
# FreeBSD download tool
User-Agent: fetch libfetch/2.0
Disallow: /
# Frequent Finders spider via Sitefusion.com
User-Agent: FFC Trap Door Spider
Disallow: /
# Filangy search and bookmark service
User-Agent: Filangy/0.01-beta (Filangy; http://www.nutch.org/docs/en/bot.html; filangy-agent@filangy.com)
Disallow: /
# Filangy search and bookmark service
User-Agent: Filangy/1.0x (Filangy; http://www.filangy.com/filangyinfo.jsp?inc=robots.jsp; filangy-agent@filangy.com)
Disallow: /
# Filangy search and bookmark service
User-Agent: Filangy/1.0x (Filangy; http://www.nutch.org/docs/en/bot.html; filangy-agent@filangy.com)
Disallow: /
# File Boost Network link checking
User-Agent: fileboost.net/1.0 (+http://www.fileboost.net)
Disallow: /
# FileHound download manager
User-Agent: FileHound x.x
Disallow: /
# filtrbox media content (news) monitoring
User-Agent: Filtrbox/1.0
Disallow: /
# Find An ISP robot
User-Agent: FindAnISP.com_ISP_Finder_v99a
Disallow: /
# Yelo.no business search (Norway) via Findexa
User-Agent: Findexa Crawler (http://www.findexa.no/gulesider/article26548.ece)
Disallow: /
# NextLinks - German vocabulary and hyperlink search
User-Agent: findlinks/x.xxx (+http://wortschatz.uni-leipzig.de/findlinks/)
Disallow: /
# Finesearch robot
User-Agent: FineBot
Disallow: /
# Finjan Vital Security Web Appliance security solution
User-Agent: Finjan-prefetch
Disallow: /
# Fireball.de robot
User-Agent: Firefly/1.0
Disallow: /
# Fireball.de robot
User-Agent: Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)
Disallow: /
# Unknown robot from Czech Technical University Prague (147.32.141.xx)
User-Agent: Firefox (kastaneta03@hotmail.com)
Disallow: /
# Unknown robot from Czech Technical University Prague (147.32.141.xx)
User-Agent: Firefox_1.0.6 (kasparek@naparek.cz)
Disallow: /
# AT&T/Fast Search robot for FirstGov (U.S.Government) portal
User-Agent: FirstGov.gov Search - POC:firstgov.webmasters@gsa.gov
Disallow: /
# Firstsfind Germany robot / link checking
User-Agent: firstsbot
Disallow: /
# Flaptor information retrieval solutions robot
User-Agent: Flapbot/0.7.2 (Flaptor Crawler; http://www.flaptor.com; crawler at flaptor period com)
Disallow: /
# JetCar/FlashGet download manager
User-Agent: FlashGet
Disallow: /
# FlatArts Favorites Icon Tool
User-Agent: FLATARTS_FAVICO
Disallow: /
# Flexum.ru search service
User-Agent: Flexum spider
Disallow: /
# Flexum.ru search service
User-Agent: Flexum/2.0
Disallow: /
# DivX.com Movie Find robot
User-Agent: FlickBot 2.0 RPT-HTTPClient/0.3-3
Disallow: /
# Metacarta / Cogent robot
User-Agent: flunky
Disallow: /
# unknown
User-Agent: fly/6.01 libwww/4.0D
Disallow: /
# FLY Index Metasearch link checking
User-Agent: flyindex.net 1.0/http://www.flyindex.net
Disallow: /
# Fnoole news crawler
User-Agent: FnooleBot/2.5.2 (+http://www.fnoole.com/addurl.html)
Disallow: /
# IBM's Almaden Research robot
User-Agent: FocusedSampler/1.0
Disallow: /
# folkd.com social search robot
User-Agent: Folkd.com Spider/0.1 beta 1 (www.folkd.com)
Disallow: /
# FollowSite robot - website monitoring
User-Agent: FollowSite Bot ( http://www.followsite.com/bot.html )
Disallow: /
# FollowSite robot - website monitoring
User-Agent: FollowSite.com ( http://www.followsite.com/b.html )
Disallow: /
# Fooky search Scorpionbots robot (65.12.170.xxx)
User-Agent: Fooky.com/ScorpionBot/ScoutOut; http://www.fooky.com/scorpionbots
Disallow: /
# Neomo Search (Germany) robot (85.10.197.1xx)
User-Agent: Francis/1.0 (francis@neomo.de http://www.neomo.de/)
Disallow: /
# Some spam bot
User-Agent: Franklin Locator 1.8
Disallow: /
# Free Downloads shareware directory link checking
User-Agent: free-downloads.net download-link validator /0.1
Disallow: /
# FreeFind.com robot
User-Agent: FreeFind.com-SiteSearchEngine/1.0 (http://freefind.com; spiderinfo@freefind.com)
Disallow: /
# Frelics backlink checking bot (beta)
User-Agent: Frelicbot/1.0 +http://www.frelic.com/
Disallow: /
# Fresh Download download manager
User-Agent: FreshDownload/x.xx
Disallow: /
# FreshNotes - music related artist search (72.3.225.xx)
User-Agent: FreshNotes crawler< report problems to crawler-at-freshnotes-dot-com
Disallow: /
# Some site scanning tool via diff. IPs
User-Agent: FSurf15a 01
Disallow: /
# Find the Best search robot
User-Agent: FTB-Bot http://www.findthebest.co.uk/
Disallow: /
# Some site scanning tool from diff. IPs i.e.: - 66.28.240.xx (cogentco.com) - 68.5.174.xx (cox.net)
User-Agent: Full Web Bot 0416B
Disallow: /
# Some site scanning tool i.e. from - 68.154.96.xx (bellsouth.net)
User-Agent: Full Web Bot 0516B
Disallow: /
# Some site scanning tool from 66.255.6.xxx (uslec.com)
User-Agent: Full Web Bot 2816B
Disallow: /
# FuseBulb search
User-Agent: FuseBulb.Com
Disallow: /
# FyberSearch FyberSpider robot
User-Agent: FyberSpider (+http://www.fybersearch.com/fyberspider.php)
Disallow: /
# DNS Right - Online DNS tools
User-Agent: unknownght.com Web Server IIS vs Apache Survey. See Results at www.DNSRight.com
Disallow: /
# Some user from bbnplanet.net (4.63.218.2xx) using an Innerprise robot tool
User-Agent: Gagglebot
Disallow: /
# Seed Search robot
User-Agent: GAIS Robot/1.0B2
Disallow: /
# Gaislab Taiwan robot
User-Agent: Gaisbot/3.0 (indexer@gais.cs.ccu.edu.tw; http://gais.cs.ccu.edu.tw/robot.php)
Disallow: /
# Gaislab Taiwan robot
User-Agent: Gaisbot/3.0+(robot06@gais.cs.ccu.edu.tw;+http://gais.cs.ccu.edu.tw/robot.php)
Disallow: /
# Galaxy robot (63.121.41.xxx)
User-Agent: GalaxyBot/1.0 (http://www.galaxy.com/galaxybot.html)
Disallow: /
# Gallent Search directory (UK)
User-Agent: Gallent Search Spider v1.4 Robot 2 (http://robot.GallentSearch.com)
Disallow: /
# Gamekit game search engine - Germany
User-Agent: gamekitbot/1.0 (+http://www.uchoose.de/crawler/gamekitbot/)
Disallow: /
# GameSpyHTTP/1.0
User-Agent: Gamespy_Arcade
Disallow: /
# GammaWare GammaSpider
User-Agent: GammaSpider/1.0
Disallow: /
# nttrd.com / Infobee.ne.jp robot
User-Agent: gazz/x.x (gazz@nttrd.com)
Disallow: /
# Geckobot user robot
User-Agent: geckobot
Disallow: /
# Google Mobile Search crawler
User-Agent: Generic Mobile Phone (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)
Disallow: /
# Unknown robot from Carnegie Mellon University (128.2.211.xxx)
User-Agent: generic_crawler/01.0217/
Disallow: /
# Lunascape Genesis browser
User-Agent: GenesisBrowser (HTTP 1.1; 0.9; XP SP2; .NET CLR 2.0.50727)
Disallow: /
# GenieKnows.com search
User-Agent: genieBot (http://64.5.245.11/faq/faq.html)
Disallow: /
# GenieKnows.com search
User-Agent: geniebot wgao@genieknows.com
Disallow: /
# Unknown robot from wavepath.com (65.254.33.1xx)
User-Agent: GeoBot/1.0
Disallow: /
# Geona Search robot / link checking
User-Agent: GeonaBot 1.x; http://www.geona.com/
Disallow: /
# GeoURL ICBM Address Server - a location-to-URL reverse directory
User-Agent: geourl/2.0b2
Disallow: /
# GeoURL ICBM Address Server - a location-to-URL reverse directory
User-Agent: GeoURLBot 1.0 (http://geourl.org)
Disallow: /
# Getbot web downloading tool / site grabber
User-Agent: GetBot
Disallow: /
# GetRight download manager
User-Agent: GetRight/3.x.x
Disallow: /
# GetRight download manager
User-Agent: GetRight/4.5xx
Disallow: /
# GetRight download manager
User-Agent: GetRight/4.x
Disallow: /
# GetRight download manager
User-Agent: GetRight/4.x[a-e]
Disallow: /
# GetRight download manager
User-Agent: GetRight/6.1 (Pro)
Disallow: /
# GetRight download manager
User-Agent: GetRightPro/6.0beta2
Disallow: /
# GetWeb - web page to email service
User-Agent: GetWeb/0.1 libwww-perl/5.16
Disallow: /
# Sixxs Ghost Route Hunter
User-Agent: GhostRouteHunter/20021130 (https://www.sixxs.net/tools/grh/; info@sixxs.net)
Disallow: /
# GigaBaz Brainbot (Germany) robot
User-Agent: gigabaz/3.1x (baz@gigabaz.com; http://gigabaz.com/gigabaz/)
Disallow: /
# Gigablast robot (64.62.168.xx)
User-Agent: Gigabot/2.0 (gigablast.com)
Disallow: /
# Gigablast robot
User-Agent: Gigabot/2.0/gigablast.com/spider.html
Disallow: /
# Gigablast robot
User-Agent: Gigabot/2.0; http://www.gigablast.com/spider.html
Disallow: /
# Gigablast robot
User-Agent: Gigabot/2.0att
Disallow: /
# Gigablast robot
User-Agent: Gigabot/3.0 (http://www.gigablast.com/spider.html)
Disallow: /
# Gigablast robot (64.62.168.xx)
User-Agent: Gigabot/x.0
Disallow: /
# Gigablast robot (64.62.168.xx)
User-Agent: GigabotSiteSearch/2.0 (sitesearch.gigablast.com)
Disallow: /
# www.gnod.net spider
User-Agent: GNODSPIDER (www.gnod.net)
Disallow: /
# Go!Zilla download manager
User-Agent: Go!Zilla 3.x (www.gozilla.com)
Disallow: /
# Go!Zilla download manager
User-Agent: Go!Zilla/4.x.x.xx
Disallow: /
# GotIt web accelerator (discontinued)
User-Agent: Go-Ahead-Got-It/1.1
Disallow: /
# GoGuides.Org (195.226.137.xx) robot
User-Agent: Goblin/0.9 (http://www.goguides.org/)
Disallow: /
# GoGuides.Org (195.226.137.xx) robot
User-Agent: Goblin/0.9.x (http://www.goguides.org/goblin-info.html)
Disallow: /
# GoForIt Search robot
User-Agent: GoForIt.com
Disallow: /
# GoForIt Search robot
User-Agent: GOFORITBOT ( http://www.goforit.com/about/ )
Disallow: /
# GoGuides.org directory & search link checking
User-Agent: GoGuides.Org Link Check
Disallow: /
# GoldenFeed.com - RSS search engine
User-Agent: GoldenFeed Spider 1.0 (http://www.goldenfeed.com)
Disallow: /
# Invention Machines Goldfire Server
User-Agent: Goldfire Server
Disallow: /
# suchen.de German local search robot
User-Agent: gonzo1[P] +http://www.suchen.de/popups/faq.jsp
Disallow: /
# suchen.de German local search robot
User-Agent: gonzo2[P] +http://www.suchen.de/faq.html
Disallow: /
# Some private robot (Wanadoo.fr client)
User-Agent: Goofer/0.2
Disallow: /
# Google instant messenger
User-Agent: Google Talk
Disallow: /
# Packard Bell Net user robot (*not* Google)
User-Agent: googlebot (larbin2.6.0@unspecified.mail)
Disallow: /
# Google image crawler (66.249.72.xxx)
User-Agent: Googlebot-Image/1.0
Disallow: /
# Google image crawler (66.249.72.xxx)
User-Agent: Googlebot-Image/1.0 ( http://www.googlebot.com/bot.html)
Disallow: /
# Google robot 66.249.64.XXX
User-Agent: Googlebot/2.1 ( http://www.google.com/bot.html)
Disallow: /
# Google robot 66.249.64.XXX
User-Agent: Googlebot/2.1 ( http://www.googlebot.com/bot.html)
Disallow: /
# Google robot 66.249.64.XXX
User-Agent: Googlebot/Test ( http://www.googlebot.com/bot.html)
Disallow: /
# Grapeshot web search system API
User-Agent: GrapeFX/0.3 libwww/5.4.0
Disallow: /
# Flatland Industries vertical search solution
User-Agent: great-plains-web-spider/flatlandbot (Flatland Industries Web Spider; http://www.flatlandindustries.com/flatlandbot.php; jason@flatlandindustries.com)
Disallow: /
# GreatNews 1.0 Beta RSS reader
User-Agent: GreatNews/1.0
Disallow: /
# GreenBrowser - IE based browser (China)
User-Agent: GreenBrowser
Disallow: /
# search gridwell favicon display
User-Agent: gridwell (http://search.gridwell.com)
Disallow: /
# Grigor Search bot
User-Agent: GrigorBot 0.8 (http://www.grigor.biz/bot.html)
Disallow: /
# Australasian Legal Information Institute (AustLII) robot
User-Agent: Gromit/1.0
Disallow: /
# Grub open source crawler
User-Agent: grub crawler(http://www.grub.org)
Disallow: /
# Grub open source crawler
User-Agent: grub-client
Disallow: /
# Google Search Appliance robot (216.239.xx.xx)
User-Agent: gsa-crawler (Enterprise; GID-01422; jplastiras@google.com)
Disallow: /
# Google Search Appliance robot (216.239.xx.xx)
User-Agent: gsa-crawler (Enterprise; GID-01742;gsatesting@rediffmail.com)
Disallow: /
# Google Enterprise Search Appliance used by Enhesa (212.35.100.1xx)
User-Agent: gsa-crawler (Enterprise; GIX-02057; dm@enhesa.com)
Disallow: /
# Google Enterprise Search Appliance used by IBM (129.41.20.1xx)
User-Agent: gsa-crawler (Enterprise; GIX-03519; cknuetter@stubhub.com)
Disallow: /
# Google Search Appliance robot (216.239.xx.xx)
User-Agent: gsa-crawler (Enterprise; GIX-0xxxx; enterprise-training@google.com)
Disallow: /
# GSiteCrawler - Google sitemap generator for Windows
User-Agent: GSiteCrawler/v1.xx rev. xxx (http://gsitecrawler.com/)
Disallow: /
# Guestbook spamming tool
User-Agent: Guestbook Auto Submitter
Disallow: /
# Northernlight robot
User-Agent: Gulliver/1.3
Disallow: /
# Yuntis Collaborative Web Resource Categorization and Ranking Project robot
User-Agent: Gulper Web Bot 0.2.4 (www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)
Disallow: /
# Gungho - Extensible web crawler written in Perl by Google Code
User-Agent: Gungho/0.08004 (http://code.google.com/p/gungho-crawler/wiki/Index)
Disallow: /
# guruji : the Indian search engine robot
User-Agent: GurujiBot/1.0 (+http://www.guruji.com/WebmasterFAQ.html)
Disallow: /
# guruji : the Indian search engine picture crawler
User-Agent: GurujiImageBot/1.0 (+http://www.guruji.com/en/WebmasterFAQ.html)
Disallow: /
# Openwave Mobile Browser on Haier T10C mobile
User-Agent: Haier-T10C/1.0 iPanel/2.0 WAP2.0 (compatible; UP.Browser/6.2.2.4; UPG1; UP/4.0; Embedded)
Disallow: /
# Happy Fun Search robot
User-Agent: HappyFunBot/1.1
Disallow: /
# Harvest-NG web crawler used by search.yahoo.com
User-Agent: Harvest-NG/1.0.2
Disallow: /
# Haste - web mapping and monitoring system
User-Agent: Haste/0.12 (HOME: http://haste.kytoon.com/)
Disallow: /
# Hatena::Antenna Japan robot
User-Agent: Hatena Antenna/0.4 (http://a.hatena.ne.jp/help#robot)
Disallow: /
# Hatena Japan proxy for handheld/mobile clients
User-Agent: Hatena Mobile Gateway/1.0
Disallow: /
# Hatena Japan robot
User-Agent: Hatena Pagetitle Agent/1.0
Disallow: /
# Hatena Japan RSS feed robot
User-Agent: Hatena RSS/0.3 (http://r.hatena.ne.jp)
Disallow: /
# Hatena::Diary (Japan) web page screenshot robot
User-Agent: HatenaScreenshot/1.0 (checker)
Disallow: /
# hbtronix.spider - Domain name spider (Germany)
User-Agent: hbtronix.spider.2 -- http://hbtronix.de/spider.php
Disallow: /
# Mirago Germany robot
User-Agent: HeinrichderMiragoRobot
Disallow: /
# Mirago Germany robot
User-Agent: HeinrichderMiragoRobot (http://www.miragorobot.com/scripts/deinfo.asp)
Disallow: /
# Helix - The SiteSearch (Canada) web crawler
User-Agent: Helix/1.x ( http://www.sitesearch.ca/helix/)
Disallow: /
# Mirago France robot
User-Agent: HenriLeRobotMirago (http://www.miragorobot.com/scripts/frinfo.asp)
Disallow: /
# Mirago search (UK) robot
User-Agent: HenrytheMiragoRobot
Disallow: /
# Mirago search (UK) robot
User-Agent: HenryTheMiragoRobot (http://www.miragorobot.com/scripts/mrinfo.asp)
Disallow: /
# O'Reilly's Perl LWP example client program from Web Client Programming with Perl
User-Agent: hgrepurl/1.0
Disallow: /
# University of Kassel Germany CsCrawler using the HTTPClient library
User-Agent: Hi! I'm CsCrawler my homepage: http://www.kde.cs.uni-kassel.de/lehre/ss2005/googlespam/crawler.html RPT-HTTPClient/0.3-3
Disallow: /
# HiDownload download manager
User-Agent: HiDownload
Disallow: /
# Hippias robot
User-Agent: Hippias/0.9 Beta
Disallow: /
# Pilot Hitlist web analytics solution
User-Agent: HitList
Disallow: /
# Hitwise spider
User-Agent: Hitwise Spider v1.0 http://www.hitwise.com
Disallow: /
# diff. IPs / unknown services
User-Agent: HLoader
Disallow: /
# Morfeo / Centrum Search (Czech Republic) robot from 65.102.46.xxx
User-Agent: holmes/3.11 (http://morfeo.centrum.cz/bot)
Disallow: /
# Onet.pl (Poland) search robot
User-Agent: holmes/3.9 (onet.pl)
Disallow: /
# Onet.pl (Poland) search robot
User-Agent: holmes/3.xx (OnetSzukaj/5.0; +http://szukaj.onet.pl)
Disallow: /
# Morfeo / Centrum Search (Czech Republic) robot from 65.102.46.xxx
User-Agent: holmes/x.x
Disallow: /
# Holes search robot (Georgia)
User-Agent: HolmesBot (http://holmes.ge)
Disallow: /
# HomePageSearch robot
User-Agent: HomePageSearch(hpsearch.uni-trier.de)
Disallow: /
# Homerweb search robot
User-Agent: Homerbot: www.homerweb.com
Disallow: /
# Honda-Search.com - Honda cars related search robot
User-Agent: Honda-Search/0.7.2 (Nutch; http://lucene.apache.org/nutch/bot.html; search@honda-search.com)
Disallow: /
# HooWWer - Next Generation Information Retrieval robot
User-Agent: HooWWWer/2.1.3 (debugging run) (+http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-infohiit.fi)
Disallow: /
# HooWWer - Next Generation Information Retrieval robot (128.214.112.xx)
User-Agent: HooWWWer/2.1.x ( http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-infohiit.fi)
Disallow: /
# HotJava browser plus HTML Component 1.1.x
User-Agent: HotJava/1.0.1/JRE1.1.x
Disallow: /
# Hotzuno - Japanese BBS reader client
User-Agent: Hotzonu/x.0
Disallow: /
# Unknown robot from HP Labs
User-Agent: HPL/Nutch-0.9 -
Disallow: /
# COMPUTERorgs.com robot (205.134.190.xxx) using htdig
User-Agent: htdig/3.1.6 (http://computerorgs.com)
Disallow: /
# htdig used by the Academie de Toulouse
User-Agent: htdig/3.1.6 (unconfigured@htdig.searchengine.maintainer)
Disallow: /
# htdig search tool
User-Agent: htdig/3.1.x (root@localhost)
Disallow: /
# Lithops Software link validation tool
User-Agent: Html Link Validator (www.lithopssoft.com)
Disallow: /
# HTML2JPG webpage to image converter
User-Agent: HTML2JPG Blackbox, http://www.html2jpg.com
Disallow: /
# HTML2JPG webpage to image converter
User-Agent: HTML2JPG Enterprise
Disallow: /
# HTML Parser Java library to parse HTML
User-Agent: HTMLParser/1.x
Disallow: /
# PHP HTTP client to access Web servers
User-Agent: HTTP Retriever
Disallow: /
# Anonymous web proxy service
User-Agent: http://Anonymouse.org/ (Unix)
Disallow: /
# Ask 24x Info (Germany) DMOZ related robot
User-Agent: http://Ask.24x.Info/ (http://narres.it/)
Disallow: /
# ACONTBOT - Acont search Germany robot
User-Agent: http://hilfe.acont.de/bot.html ACONTBOT
Disallow: /
# OzySoftware.com software directory link checking
User-Agent: http://OzySoftware.com/Index.html
Disallow: /
# IBM's Almaden Research robot
User-Agent: http://www.almaden.ibm.com/cs/crawler
Disallow: /
# IBM's Almaden Research robot
User-Agent: http://www.almaden.ibm.com/cs/crawler [rc1.wf.ibm.com]
Disallow: /
# IBM's Almaden Research robot
User-Agent: http://www.almaden.ibm.com/cs/crawler [wf216]
Disallow: /
# Istarthere.com search robot
User-Agent: http://www.istarthere.com_spider@istarthere.com
Disallow: /
# Monogol - German open source search engine project (195.226.167.1xx)
User-Agent: http://www.monogol.de
Disallow: /
# TrendTech Search Engine (Denmark) robot
User-Agent: http://www.trendtech.dk/spider.asp)
Disallow: /
# HTTP::Lite - Standalone Perl module for retreiving HTTP documents
User-Agent: HTTP::Lite/2.x.x
Disallow: /
# HTTPEyes - Web proxy cache
User-Agent: HTTPEyes
Disallow: /
# HTTPResume Amiga download manager
User-Agent: HTTPResume v. 1.x
Disallow: /
# HttpUnit - Java test code for emulating browser behaviour
User-Agent: httpunit/1.5
Disallow: /
# HttpUnit - Java browser behavior simulation tool
User-Agent: httpunit/1.x
Disallow: /
# Hybrid Share mono C#/Gtk# application for file sharing
User-Agent: Hybrid/1.2 [en] (OS Independent)
Disallow: /
# Hyper Estraier full-text search system
User-Agent: HyperEstraier/1.x.xx
Disallow: /
# i1search robot
User-Agent: i1searchbot/2.0 (i1search web crawler; http://www.i1search.com; crawler@i1search.com)
Disallow: /
# Alexa / The Internet Archive (209.237.238.1xx)
User-Agent: IAArchiver-1.0
Disallow: /
# Unknown robot (reads robots.txt) from chinatelecom (219.142.78.xx)
User-Agent: iaskspider
Disallow: /
# Iask search / Sina portal robot (China)
User-Agent: iaskspider2 (iask@staff.sina.com.cn)
Disallow: /
# Alexa / The Internet Archive (209.237.238.1xx)
User-Agent: ia_archiver
Disallow: /
# Alexa / The Internet Archive (209.237.238.1xx)
User-Agent: ia_archiver-web.archive.org
Disallow: /
# Alexa / The Internet Archive (209.237.238.1xx)
User-Agent: ia_archiver/1.6
Disallow: /
# IOSpirit iBrowse Amiga Browser
User-Agent: IBrowse/2.2 (AmigaOS 3.5)
Disallow: /
# IOSpirit iBrowse Amiga Browser
User-Agent: IBrowse/2.2 (Windows 3.1)
Disallow: /
# iCab MAC Web browser
User-Agent: iCab/2.5.2 (Macintosh; I; PPC)
Disallow: /
# Knowledge Clustered Group ICC-Crawler (University of Tokyo - Japan)
User-Agent: ICC-Crawler(Mozilla-compatible; http://kc.nict.go.jp/icc/crawl.html; icc-crawl(at)ml(dot)nict(dot)go(dot)jp)
Disallow: /
# Knowledge Clustered Group ICC-Crawler (University of Tokyo - Japan)
User-Agent: ICC-Crawler(Mozilla-compatible;http://kc.nict.go.jp/icc/crawl.html;icc-crawl-contact(at)ml(dot)nict(dot)go(dot)jp)
Disallow: /
# ICJobs - Intelligence Competence Center (Germany) robot
User-Agent: iCCrawler (http://www.iccenter.net)
Disallow: /
# ICJobs - Intelligence Competence Center (Germany) robot
User-Agent: ICCrawler - ICjobs (http://www.icjobs.de/bot.htm)
Disallow: /
# ICE Java browser
User-Agent: ICE Browser/5.05 (Java 1.4.0; Windows 2000 5.0 x86)
Disallow: /
# Goo Japan / Inktomi robot (210.173.179.xx)
User-Agent: ichiro/x.0 (http://help.goo.ne.jp/door/crawler.html)
Disallow: /
# Goo Japan / Inktomi robot (210.173.179.xx)
User-Agent: ichiro/x.0 (ichiro@nttr.co.jp)
Disallow: /
# Iconsurf.com - Visual Surf Engine / favicon finder
User-Agent: IconSurf/2.0 favicon finder (see http://iconsurf.com/robot.html)
Disallow: /
# Iconsurf.com - Visual Surf Engine / favicon finder
User-Agent: IconSurf/2.0 favicon monitor (see http://iconsurf.com/robot.html)
Disallow: /
# icooLoader download manager
User-Agent: ICOO Loader v.x.x.x
Disallow: /
# ICRA (Internet Content Rating Association) label spider
User-Agent: ICRA_label_spider/x.0
Disallow: /
# ICS Robot Search Engine (International Christian school of Seoul)
User-Agent: icsbot-0.1
Disallow: /
# Internet Download Accelerator
User-Agent: IDA
Disallow: /
# Janas (Ideare.com / Tiscali.it) robot
User-Agent: ideare - SignSite/1.x
Disallow: /
# Unknown UA from Yahoo China
User-Agent: iearthworm/1.0, iearthworm@yahoo.com.cn
Disallow: /
# Some bookmark manager
User-Agent: IEFav172Free
Disallow: /
# iFeed.jp - online rss aggregator (in development)
User-Agent: iFeed.jp/2.0 (www.psychedelix.com/agents/agents.rss; 0 subscribers)
Disallow: /
# Igde search (Russia) robot
User-Agent: igdeSpyder (compatible; igde.ru; +http://igde.ru/doc/tech.html)
Disallow: /
# iGetter download manager
User-Agent: iGetter/1.x (Macintosh;G;PPC)
Disallow: /
# iGetter download manager
User-Agent: iGetter/2 (Macintosh; U; PPC Mac OS X; en)
Disallow: /
# Webkhoj - Indian language search engine
User-Agent: IIITBOT/1.1 (Indian Language Web Search Engine; http://webkhoj.iiit.net; pvvpr at iiit dot ac dot in)
Disallow: /
# Ilial Knowledge Search robot
User-Agent: ilial/Nutch-0.9 (Ilial, Inc. is a Los Angeles based Internet startup company. For more information please visit http://www.ilial.com/crawler; http://www.ilial.com/crawler; crawl@ilial.com)
Disallow: /
# Unknown robot from UCLA using Nutch
User-Agent: ilial/Nutch-0.9-dev
Disallow: /
# Ilse Netherlands robot (62.69.178.xx)
User-Agent: IlseBot/1.x
Disallow: /
# Il Trovatore - Italian search engine robot
User-Agent: IlTrovatore-Setaccio ( http://www.iltrovatore.it)
Disallow: /
# Il Trovatore - Italian search engine robot
User-Agent: Iltrovatore-Setaccio/0.3-dev (Indexing; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)
Disallow: /
# Il Trovatore - Italian search engine robot
User-Agent: IlTrovatore-Setaccio/1.2 ( http://www.iltrovatore.it/aiuto/faq.html)
Disallow: /
# Il Trovatore - Italian search engine robot
User-Agent: Iltrovatore-Setaccio/1.2 (It-bot; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)
Disallow: /
# Il Trovatore - Italian search engine robot
User-Agent: iltrovatore-setaccio/1.2-dev (spidering; http://www.iltrovatore.it/aiuto/.....)
Disallow: /
# Il Trovatore - Italian search engine robot
User-Agent: IlTrovatore/1.2 (IlTrovatore; http://www.iltrovatore.it/bot.html; bot@iltrovatore.it)
Disallow: /
# ImageVisu image and graphics viewer - display files from the Web (HTTP and ECWP)
User-Agent: ImageVisu/v4.x.x
Disallow: /
# BD-Brandprotect copyright infringement crawler
User-Agent: ImageWalker/2.0 (www.bdbrandprotect.com)
Disallow: /
# HttpClient - a PHP Web Client Class
User-Agent: Incutio HttpClient v0.x
Disallow: /
# IncyWincy search engine using DMOZ Open Directory database
User-Agent: IncyWincy data gatherer(webmaster@loopimprovements.com
Disallow: /
# IncyWincy search engine using DMOZ Open Directory database
User-Agent: IncyWincy page crawler(webmaster@loopimprovements.com
Disallow: /
# Look.com robot using IncyWincy search engine
User-Agent: IncyWincy(http://www.look.com)
Disallow: /
# IncyWincy search engine using DMOZ Open Directory database
User-Agent: IncyWincy(http://www.loopimprovements.com/robot.html)
Disallow: /
# IncyWincy search engine using DMOZ Open Directory database
User-Agent: IncyWincy/2.1(loopimprovements.com/robot.html)
Disallow: /
# Index the Web (69.57.134.xx) crawler
User-Agent: IndexTheWeb.com Crawler7
Disallow: /
# Spam bot from diff. IPs
User-Agent: Industry Program 1.0.x
Disallow: /
# Inet Library Resource Center robot
User-Agent: Inet library
Disallow: /
# InetURL IVM (phone software) plugin for web server access ?
User-Agent: InetURL/1.0
Disallow: /
# Pubblisito.com search - Italia
User-Agent: info@pubblisito.com- (http://www.pubblisito.com) il Sud dei Motori di Ricerca
Disallow: /
# Infoaxe - search history and bookmark service
User-Agent: Infoaxe./Nutch-0.9
Disallow: /
# Converas RetrievalWare Internet Spider (63.241.61.x)
User-Agent: infoConveraCrawler/0.8 ( http://www.authoritativeweb.com/crawl)
Disallow: /
# Versions-project.org Ingelin spider
User-Agent: InfoFly/1.0 (http://www.versions-project.org/)
Disallow: /
# InfoLink link checking tool
User-Agent: InfoLink/1.x
Disallow: /
# INFOMINE Scholary Internet Resource Collection crawler
User-Agent: INFOMINE/8.0 Adders
Disallow: /
# INFOMINE Scholary Internet Resource Collection crawler
User-Agent: INFOMINE/8.0 RemoteServices
Disallow: /
# INFOMINE Scholary Internet Resource Collection crawler
User-Agent: INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
Disallow: /
# 164.71.1.1xx jp.co.fujitsu.t2 Robot
User-Agent: InfoNaviRobot(F107)
Disallow: /
# Infoseek robot
User-Agent: InfoSeek Sidewinder/0.9
Disallow: /
# Infoseek robot
User-Agent: InfoSeek Sidewinder/1.0A
Disallow: /
# Infoseek robot
User-Agent: InfoSeek Sidewinder/1.1A
Disallow: /
# Infoseek robot
User-Agent: Infoseek SideWinder/1.45 (Compatible; MSIE 10.0; UNIX)
Disallow: /
# Infoseek Japan robot
User-Agent: Infoseek SideWinder/2.0B (Linux 2.4 i686)
Disallow: /
# Ilse Netherlands robot (62.69.178.xx)
User-Agent: INGRID/3.0 MT (webcrawler@NOSPAMexperimental.net; http://webmaster.ilse.nl/jsp/webmaster.jsp)
Disallow: /
# Inktomi (Hotbot-Lycos NBCi etc.) robot
User-Agent: Inktomi Search
Disallow: /
# Enterprise Search engine software (64.202.165.xxx)
User-Agent: InnerpriseBot/1.0 (http://www.innerprise.com/)
Disallow: /
# Insitor Search robot (80.67.20.1xx)
User-Agent: Insitor.com search and find world wide!
Disallow: /
# Insitor Search robot (80.67.20.1xx)
User-Agent: Insitornaut
Disallow: /
# download manager
User-Agent: InstallShield DigitalWizard
Disallow: /
# Integrity - website broken link checker for MAC OSx
User-Agent: integrity/1.6
Disallow: /
# Microton Intelix robot for Eurotran translation software ?
User-Agent: Intelix/0.x (cs; http://www.microton.cz/intelix/; microton@@microton.cz)
Disallow: /
# Interarchy file transfer software - SFTP/FTP client for Mac OS X
User-Agent: Interarchy/x.x.x (InterarchyCrawler)
Disallow: /
# Dream Train (Japan) Internet search robot
User-Agent: Internet Ninja x.0
Disallow: /
# Heritrix - The Internet Archive's open-source crawler based on Nutch (207.241.225.2xx)
User-Agent: InternetArchive/0.8-dev(Nutch;http://lucene.apache.org/nutch/bot.html;nutch-agent@lucene.apache
Disallow: /
# Internet Link Agent - link checking tool
User-Agent: InternetLinkAgent/3.1
Disallow: /
# Internetseer Web site monitoring / Claymont robot
User-Agent: InternetSeer.com
Disallow: /
# intraVNews - Feed reader & RSS aggregator for Outlook
User-Agent: intraVnews/1.x
Disallow: /
# Internet Open Index crawler using Nutch
User-Agent: IOI/2.0 (ISC Open Index crawler; http://index.isc.org/; bot@index.isc.org)
Disallow: /
# IP*Works! HTTP Component
User-Agent: IP*Works! V5 HTTP/S Component - by /n software - www.nsoftware.com
Disallow: /
# IP2Location - Reverse lookup geographical data and ISP by IP
User-Agent: IP2LocationBot/1.0 http://www.ip2location.com
Disallow: /
# IP2Map - geographical IP mapping
User-Agent: IP2MapBot/1.1 http://www.ip2map.com
Disallow: /
# Laurions Ipium robot
User-Agent: IPiumBot laurion(dot)com
Disallow: /
# Ipselon Web Search robot
User-Agent: IpselonBot/0.xx-beta (Ipselon; http://www.ipselon.com; ipselonbot@ipselon.com)
Disallow: /
# Iria download manager
User-Agent: Iria/1.xxa
Disallow: /
# IRL-crawler - Texas A&M University research project crawler
User-Agent: IRLbot/1.0 ( http://irl.cs.tamu.edu/crawler)
Disallow: /
# IRL-crawler - Texas A&M University research project crawler
User-Agent: IRLbot/3.0 (compatible; MSIE 6.0; http://irl.cs.tamu.edu/crawler/)
Disallow: /
# url_log - Irssi Perl url grabber
User-Agent: IrssiUrlLog/0.2
Disallow: /
# Irvine downloading tool
User-Agent: Irvine/1.x.x
Disallow: /
# Unknown spambot / harvester from diff. IPs
User-Agent: ISC Systems iRc Search 2.1
Disallow: /
# iSiloX document converter for iSilo reader
User-Agent: iSiloX/4.xx Windows/32
Disallow: /
# Unknown University of Alberta link-checking ?
User-Agent: isurf (tszhu@canada.com)
Disallow: /
# iTunes UA name for access and decrypt the iTunes music store pages
User-Agent: iTunes/x.x.x
Disallow: /
# Some spam bot from 66.139.78.xx(x)
User-Agent: IUPUI Research Bot v 1.9a
Disallow: /
# iVia robot - Open source Internet portal & virtual library system software
User-Agent: iVia Page Fetcher (http://ivia.ucr.edu/useragents.shtml)
Disallow: /
# iVia robot - Open source Internet portal & virtual library system software
User-Agent: iVia/4.0 CanonizeUrl (http://infomine.ucr.edu/iVia/useragents.shtml
Disallow: /
# BD BrandProtect - brand, company or trademarks online monitoring
User-Agent: IWAgent/ 1.0 - www.brandprotect.com
Disallow: /
# Proxy message from jp-q.ne.jp
User-Agent: J-PHONE/3.0/J-SH07
Disallow: /
# ODIN Directory Japan robot (163.138.95.xx)
User-Agent: Jabot/6.x (http://odin.ingrid.org/)
Disallow: /
# ODIN Directory Japan robot (163.138.95.xx)
User-Agent: Jabot/7.x.x (http://odin.ingrid.org/)
Disallow: /
# German Domanova (offline since Feb.02) robot
User-Agent: Jack
Disallow: /
# Jakarta Commons (Java based) HTTP client
User-Agent: Jakarta Commons-HttpClient/2.0xxx
Disallow: /
# Jakarta Commons (Java based) HTTP client
User-Agent: Jakarta Commons-HttpClient/3.0-rcx
Disallow: /
# JamBot search robot (70.146.82.xx)
User-Agent: Jambot/0.1.x (Jambot; http://www.jambot.com/blog; crawler@jambot.com)
Disallow: /
# JamBot search robot
User-Agent: Jambot/0.2.1 (Jambot; http://www.jambot.com/blog/static.php?page=webmaster-robot; crawler@jambot.com)
Disallow: /
# Java VM
User-Agent: Java 1.1
Disallow: /
# Java VM
User-Agent: Java/1.4.1_01
Disallow: /
# Java VM
User-Agent: Java1.0.21.0
Disallow: /
# Java VM
User-Agent: Java1.1.xx.x
Disallow: /
# Java VM
User-Agent: Java1.3.0rc1
Disallow: /
# Java VM
User-Agent: Java1.3.x
Disallow: /
# Java VM
User-Agent: Java1.4.0
Disallow: /
# Jayde B2B Search robot (66.28.139.xx)
User-Agent: Jayde Crawler. http://www.jayde.com
Disallow: /
# some site downloading tool ? via 61.77.51.xxx
User-Agent: JBH Agent 2.0
Disallow: /
# WAP 2.0 / jBrowser for handhelds
User-Agent: jBrowser/J2ME Profile/MIDP-1.0 Configuration/CLDC-1.0 (Google WAP Proxy/1.0)
Disallow: /
# JCheckLinks Java hyperlink validator
User-Agent: JCheckLinks/0.1 RPT-HTTPClient/0.3-1
Disallow: /
# Java Development Kit
User-Agent: JDK/1.1
Disallow: /
# Insignias Jeode (PDA) Java platform
User-Agent: Jeode/1.x.x
Disallow: /
# JetEye Search robot (64.62.142.xxx / 64.71.144.xxx)
User-Agent: Jetbot/1.0
Disallow: /
# Omea RSS - Atom - newsgroups web page reader
User-Agent: JetBrains Omea Reader 1.0.x (http://www.jetbrains.com/omea_reader/)
Disallow: /
# Omea RSS - Atom - newsgroups web page reader
User-Agent: JetBrains Omea Reader 2.0 Release Candidate 1 (http://www.jetbrains.com/omea_reader/)
Disallow: /
# JetCar / Flashget download manager
User-Agent: JetCar
Disallow: /
# Jigsaw - W3C's CSS Validator Server
User-Agent: Jigsaw/2.2.x W3C_CSS_Validator_JFouffa/2.0
Disallow: /
# Jobo website downloading program
User-Agent: JoBo/1.x (http://www.matuschek.net/jobo.html)
Disallow: /
# Jobo website downloading program
User-Agent: JoBo/@JOBO_VERSION@(http://www.matuschek.net/jobo.html)
Disallow: /
# Finacialbot.com - German (213.61.218.xx) job search JobRoboter
User-Agent: JobSpider_BA/1.1
Disallow: /
# Jocsoft Web Spider - website downloading tool
User-Agent: JOC Web Spider
Disallow: /
# Jordo Media RSS / Atom feed directory link checking
User-Agent: JordoMedia/1.0 RSS File Reader (http://www.jordomedia.com)
Disallow: /
# Journster.com RSS/Atom aggregator
User-Agent: Journster [alpha] (http://journster.com/)
Disallow: /
# Journster.com RSS/Atom aggregator
User-Agent: Journster.com RSS/Atom aggregator 0.5 (http://www.journster.com/bot.phtml)
Disallow: /
# Check Favorites bookmark checking
User-Agent: JRTS Check Favorites Utility
Disallow: /
# Check Favorites bookmark checking
User-Agent: JRTwine Software Check Favorites Utility
Disallow: /
# Jyxo search (Czech Republic) robot (212.71.128.xx)
User-Agent: Jyxobot/x
Disallow: /
# K-meleon browser - Windows 2000
User-Agent: K-Meleon/0.6 (Windows; U; Windows NT 5.1; en-US; rv:0.9.5) Gecko/20011011
Disallow: /
# Verity K2 Spider ( Network search software)
User-Agent: k2spider
Disallow: /
# Unknown robot from AITrc (Advanced Information Technology Research Center) - Korea
User-Agent: KAIST AITrc Crawler
Disallow: /
# Kakle ranked metasearch robot
User-Agent: KakleBot - www.kakle.com/0.1 (KakleBot - www.kakle.com; http:// www.kakle.com/bot.html; support@kakle.com)
Disallow: /
# Kalooga image crawler
User-Agent: kalooga/kalooga-4.0-dev-datahouse (Kalooga; http://www.kalooga.com; info@kalooga.com)
Disallow: /
# Kalooga image crawler
User-Agent: kalooga/KaloogaBot (Kalooga; http://www.kalooga.com/info.html?page=crawler; crawler@kalooga.com)
Disallow: /
# Kapere site grapper / web downloader
User-Agent: Kapere (http://www.kapere.com)
Disallow: /
# Kazehakase - Gecko based browser (Japan)
User-Agent: Kazehakase/0.x.x.[x]
Disallow: /
# Openwave UP.Browser for mobiles via Google WAP Proxy (216.239.33.x)
User-Agent: KDDI-SN22 UP.Browser/6.0.7 (GUI) MMP/1.1 (Google WAP Proxy/1.0)
Disallow: /
# Kenjin Spider search agent
User-Agent: Kenjin Spider
Disallow: /
# Dznet.com Kevin crawler (link checking ?) via 68.39.148.xx (nj.comcast.net)
User-Agent: Kevin http://dznet.com/kevin/
Disallow: /
# Website AlertsKevin crawler (website monitoring) via 68.39.148.xx (nj.comcast.net)
User-Agent: Kevin http://websitealert.net/kevin/
Disallow: /
# Voila.fr robot
User-Agent: KE_1.0/2.0 libwww/5.2.8
Disallow: /
# Some Perl search script from KFSW (Germany)
User-Agent: KFSW-Bot (Version: 1.01 powered by KFSW www.kfsw.de)
Disallow: /
# kinja weblog search robot
User-Agent: kinja-imagebot (http://www.kinja.com/)
Disallow: /
# kinja weblog search robot
User-Agent: kinjabot (http://www.kinja.com)
Disallow: /
# Fireball search (Germany) robot
User-Agent: KIT-Fireball/2.0
Disallow: /
# Fireball search (Germany) robot
User-Agent: KIT-Fireball/2.0 (compatible; Mozilla 4.0; MSIE 5.5)
Disallow: /
# Klondike WAP Browser
User-Agent: Klondike/1.50 (WSP Win32) (Google WAP Proxy/1.0)
Disallow: /
# University of Washington KnowItAll - web information extraction
User-Agent: KnowItAll(knowitall@cs.washington.edu)
Disallow: /
# The knowledge.com (ODP) directory robot
User-Agent: Knowledge.com/0.x
Disallow: /
# Kontiki Client download manager
User-Agent: Kontiki Client x.xx
Disallow: /
# Krugle source code search engine for developers (64.71.164.1xx)
User-Agent: Krugle/Krugle,Nutch/0.8+ (Krugle web crawler; http://www.krugle.com/crawler/info.html; webcrawler@krugle.com)
Disallow: /
# KnowledgeStorm technology industry crawler for Findtech.com
User-Agent: KSbot/1.0 (KnowledgeStorm crawler; http://www.knowledgestorm.com/resources/content/crawler/index.html; crawleradmin@knowledgestorm.com)
Disallow: /
# Kuloko contextual search robot
User-Agent: kuloko-bot/0.x
Disallow: /
# Kuloko contextual search robot
User-Agent: kulokobot www.kuloko.com kuloko@backweave.com
Disallow: /
# National Library of Sweden Heritage Project robot
User-Agent: kulturarw3/0.1
Disallow: /
# Link or server checking from Sanoma Budapest (195.70.35.xxx)
User-Agent: KummHttp/1.1 (compatible; KummClient; Linux rulez)
Disallow: /
# TREC Blog Track - Blog and news feed crawler
User-Agent: Labrador/0.2; http://ir.dcs.gla.ac.uk/labrador; craigm@dcs.gla.ac.uk
Disallow: /
# Intels Lachesis web site response time monitoring tool
User-Agent: Lachesis
Disallow: /
# Unknown robot from Easten Network China (202.96.51.1xx)
User-Agent: lanshanbot/1.0
Disallow: /
# Unknown robot from Easten Network China (202.96.51.1xx)
User-Agent: lanshanbot/1.0 (+http://search.msn.com/msnbot.htm)
Disallow: /
# Lapozz search (Hungary) robot (82.131.195.xx)
User-Agent: LapozzBot/1.4 ( http://robot.lapozz.com)
Disallow: /
# Lapozz search (Hungary) robot (82.131.195.xx)
User-Agent: LapozzBot/1.5 (+http://robot.lapozz.hu)
Disallow: /
# Larbin indexer used as Metacarta.com (66.28.xx.xxx) robot
User-Agent: larbin (samualt9@bigfoot.com)
Disallow: /
# Unknown robot from 66.230.140.xx (argon.oxeo.com)
User-Agent: LARBIN-EXPERIMENTAL (efp@gmx.net)
Disallow: /
# Larbin indexer used by Mitsubishi Electric Research Labs
User-Agent: larbin_2.1.1 larbin2.1.1@somewhere.com
Disallow: /
# Larbin indexer used as Compete.com crawler
User-Agent: larbin_2.2.0 (crawl@compete.com)
Disallow: /
# Larbin indexer used as Inria robot
User-Agent: larbin_2.2.1_de_Viennot (Laurent.Viennot@inria.fr)
Disallow: /
# Larbin indexer used as Kyoto University robot
User-Agent: larbin_2.2.2 (sugayama@lab7.kuis.kyoto-u.ac.jp)
Disallow: /
# Larbin indexer used as Inria robot
User-Agent: larbin_2.2.2_guillaume (guillaume@liafa.jussieu.fr)
Disallow: /
# Larbin indexer used by an unknown dsl.net client
User-Agent: larbin_2.6.0 (larbin2.6.0@unspecified.mail)
Disallow: /
# Larbin indexer used by diff. IPs / services
User-Agent: larbin_2.6.1 (larbin2.6.1@unspecified.mail)
Disallow: /
# Larbin indexer used by National Institut of Informatics (NII/Japan)
User-Agent: larbin_2.6.2 (hamasaki@grad.nii.ac.jp)
Disallow: /
# Larbin indexer used by diff. IPs
User-Agent: larbin_2.6.2 (larbin2.6.2@unspecified.mail)
Disallow: /
# Larbin indexer used as robot by Georgia Institute of Technology http://www.gatech.edu/
User-Agent: larbin_2.6.2 (listonATccDOTgatechDOTedu)
Disallow: /
# Larbin indexer used by Technical University of Crete
User-Agent: larbin_2.6.2 (pimenas@systems.tuc.gr)
Disallow: /
# Larbin indexer used as Lemur Consulting robot
User-Agent: larbin_2.6.2 (tom@lemurconsulting.com)
Disallow: /
# Larbin indexer used as robot via cloud9.net (168.100.192.xxx)
User-Agent: larbin_2.6.2 (vitalbox1@hotmail.com)
Disallow: /
# Unknown robot from EPFL Lausanne Switzerland (128.178.155.1xx)
User-Agent: larbin_2.6.3 (ltaa_web_crawler@groupes.epfl.ch)
Disallow: /
# Larbin indexer used by GenieKnows.com search
User-Agent: larbin_2.6.3 (wgao@genieknows.com)
Disallow: /
# Larbin indexer used by Next Generation Information Retrieval (NGIR)
User-Agent: larbin_2.6.3_for_(http://cosco.hiit.fi/search/) tsilande@hiit.fi
Disallow: /
# Larbin indexer used by CEA / DCom Rechercher
User-Agent: larbin_2.6_basileocaml (basile.starynkevitch@cea.fr)
Disallow: /
# Larbin indexer used as Inria robot
User-Agent: larbin_devel (http://pauillac.inria.fr/~ailleret/prog/larbin/)
Disallow: /
# LawInfo - Lawyer and attorney directory
User-Agent: lawinfo-crawler/Nutch-0.9-dev (Crawler for lawinfo.com pages; http://www.lawinfo.com; webmaster@lawinfo.com)
Disallow: /
# ROADS - Perl web based subject based gateway tool
User-Agent: lc/$ROADS::Version libwww-perl/5.00
Disallow: /
# unknown robot via MTT.ca / Aliant.ca
User-Agent: lcabotAccept: */*
Disallow: /
# LeapTag news reader and content discovery tool
User-Agent: LeapTag/0.8.1.beta081.r3750 (compatible; Mozilla 4.0; MSIE 5.5; robot@yoriwa.com)
Disallow: /
# Linkexchange crawler
User-Agent: LECodeChecker/3.0 libgetdoc/1.0
Disallow: /
# Leechget download manager
User-Agent: LeechGet 200x (www.leechget.de)
Disallow: /
# Gseek.com (site is offline) robot
User-Agent: LEIA/2.90
Disallow: /
# Gseek.com (site is offline) robot
User-Agent: LEIA/3.01pr (LEIAcrawler; [SNIP])
Disallow: /
# Maybe logfile spamming for Lets crawl! search (Germany)
User-Agent: LetsCrawl.com/1.0 +http://letscrawl.com/
Disallow: /
# Lexibot (exMataHari) search software
User-Agent: LexiBot/1.00
Disallow: /
# Polaris mobile browser on LG LX 260 Sprint Rumor phone
User-Agent: LG-LX260 POLARIS-LX260/2.0 MMP/2.0 Profile/MIDP-2.0 Configuration/CLDC-1.1
Disallow: /
# LG 8138 Mobile Phone browser
User-Agent: LG/U8138/v1.0
Disallow: /
# About.com robot
User-Agent: Libby_1.1/libwww-perl/5.47
Disallow: /
# libcurl's (multiprotocol file transfer library) standard user-agent name
User-Agent: libcurl-agent/1.0
Disallow: /
# LibertyW search for mobile (France)
User-Agent: LibertyW (+http://www.lw01.com)
Disallow: /
# Korea Telecom Search robot
User-Agent: libWeb/clsHTTP -- hiongun@kt.co.kr
Disallow: /
# CMP United Media robot
User-Agent: libwww-perl/5.41
Disallow: /
# SplatSearch robot (207.44.142.xx)
User-Agent: libwww-perl/5.45
Disallow: /
# Alexa robot
User-Agent: libwww-perl/5.48
Disallow: /
# diff. IPs / services
User-Agent: libwww-perl/5.50
Disallow: /
# Fast Search robot
User-Agent: libwww-perl/5.52 FP/2.1
Disallow: /
# Fast Search robot
User-Agent: libwww-perl/5.52 FP/4.0
Disallow: /
# diff. IPs / services
User-Agent: libwww-perl/5.53
Disallow: /
# Profile for You internet profiling (?)
User-Agent: libwww-perl/5.63
Disallow: /
# unknown link checking from Wanadoo.fr (193.253.33.xxx)
User-Agent: libwww-perl/5.64
Disallow: /
# Amidalla search engine robot (62.241.33.xx)
User-Agent: libwww-perl/5.65
Disallow: /
# SplatSearch robot (72.36.210.xx)
User-Agent: libwww-perl/5.800
Disallow: /
# Mediater Rechercher robot
User-Agent: libwww/5.3.2
Disallow: /
# Liferea - Linux feed reader
User-Agent: Liferea/0.x.x (Linux; en_US.UTF-8; http://liferea.sf.net/)
Disallow: /
# Liferea (Linux Feed Reader) news aggregator for Unix and Linux
User-Agent: Liferea/1.x.x (Linux; es_ES.UTF-8; http://liferea.sf.net/)
Disallow: /
# Lightning Download manager
User-Agent: LightningDownload/1.0beta2
Disallow: /
# Lightning Download manager
User-Agent: LightningDownload/1.x.x
Disallow: /
# Lightning Download manager
User-Agent: LightningDownload/1.x.x [Accelerated x]
Disallow: /
# Lijit blog search spider
User-Agent: LijitSpider/Nutch-0.9 (Reports crawler; http://www.lijit.com/; info(a)lijit(d)com)
Disallow: /
# Some spam bot
User-Agent: Lincoln State Web Browser
Disallow: /
# Link Valet online link checking
User-Agent: Link Valet Online 1.x
Disallow: /
# Linkalarm link validation
User-Agent: LinkAlarm/2.x
Disallow: /
# Linkbot Pro link checking software
User-Agent: Linkbot
Disallow: /
# Rpsoft 2000 Site-Crawler
User-Agent: linkbot
Disallow: /
# Linkbot Pro link checking software
User-Agent: Linkbot x.0
Disallow: /
# Linkcheck - linkchecking tool for Unix/Linux
User-Agent: LinkCheck (linkcheck@inter7.com http://www.inter7.com/linkcheck)
Disallow: /
# Linklint - Perl html link checker
User-Agent: LinkLint-checkonly/2.x.x
Disallow: /
# Linklint - Perl html link checker
User-Agent: LinkLint-spider/2.x.x
Disallow: /
# Linknz - The Kiwi Search Engine (New Zealand)
User-Agent: linknzbot
Disallow: /
# Link-Pimp web directory link checking
User-Agent: LinkPimpin v1.0
Disallow: /
# TafWeb link checking program
User-Agent: LinkProver 2.1
Disallow: /
# Links text browser for Unix & OS/2
User-Agent: Links (0.9x; Linux 2.4.7-10 i686)
Disallow: /
# Links text browser for Unix & OS/2
User-Agent: Links (0.9xpre12; Linux 2.2.14-5.0 i686; 80x24)
Disallow: /
# Links text browser for Unix & OS/2
User-Agent: Links (2.xpre7; Linux 2.4.18 i586; x)
Disallow: /
# Links SQL directory management program
User-Agent: Links - http://gossamer-threads.com/scripts/links/
Disallow: /
# Links SQL directory management program
User-Agent: Links 2.0 (http://gossamer-threads.com/scripts/links/)
Disallow: /
# Links SQL directory management program
User-Agent: Links SQL (http://gossamer-threads.com/scripts/links-sql/)
Disallow: /
# Links4us ODP based directory link checking
User-Agent: Links4US-Crawler, (+http://links4us.com/)
Disallow: /
# Shareware robot from Elsop.com (used by Indiafocus/Indiainfo and others)
User-Agent: LinkScan/11.0beta2 UnixShareware robot from Elsop.com (used by Indiafocus/Indiainfo)
Disallow: /
# Shareware robot from Elsop.com (used by Indiafocus/Indiainfo and others)
User-Agent: LinkScan/9.0g Unix
Disallow: /
# Shareware robot from Elsop.com
User-Agent: LinkScan/x.x Unix
Disallow: /
# Linksmanager bookmark checking
User-Agent: LinksManager.com (http://linksmanager.com/linkchecker.html)
Disallow: /
# LinkSonar (Japan) link checking tool
User-Agent: LinkSonar/1.35
Disallow: /
# Left Side Software's LinkSweeper (ceased) bookmark utility
User-Agent: LinkSweeper/1.x
Disallow: /
# Seventwentyfour link checking robot
User-Agent: LinkWalker
Disallow: /
# link check 3 - Perl HTML link checker (from Perl for Web Site Management)
User-Agent: link_check3.plx libwww-perl/5.65
Disallow: /
# Listbid.com / Directnic.com link checking ?
User-Agent: ListBidBot (freelance job spider http://listbid.com)Freelance
Disallow: /
# WKD Lab: LiveTrans - Online query and terminology translation service
User-Agent: LiveTrans/Nutch-0.9 (maintainer: cobain at iis dot sinica dot edu dot tw; http://wkd.iis.sinica.edu.tw/LiveTrans/)
Disallow: /
# llaut robot - Universitat de les Illes Balears (Spain)
User-Agent: Llaut/1.0 (http://mnm.uib.es/~gallir/llaut/bot.html)
Disallow: /
# E-Mail harvesting robot - same as ContactBot
User-Agent: LMQueueBot/0.2
Disallow: /
# lmspider from Scansoft (192.133.61.xx) - Web text collector
User-Agent: lmspider (lmspider@scansoft.com)
Disallow: /
# Lexis-Nexis robot
User-Agent: LNSpiderguy
Disallow: /
# LocalBot company information collector
User-Agent: LocalBot/1.0 ( http://www.localbot.co.uk/)
Disallow: /
# Local.com local search robot (216.52.252.xxx)
User-Agent: LocalcomBot/1.2.x ( http://www.local.com/bot.htm)
Disallow: /
# Lockstep (website content protection tool) user agent
User-Agent: Lockstep Spider/1.0
Disallow: /
# GlobalQueue spider (64.40.105.xxx)
User-Agent: Look.com
Disallow: /
# Lotus Notes browser
User-Agent: Lotus-Notes/4.5 ( Windows-NT )
Disallow: /
# IBM Lotus Discovery Server
User-Agent: LotusDiscovery/x.0 (compatible; Mozilla 4.0; MSIE 4.01; Windows NT)
Disallow: /
# Everatom.com song lyrics search
User-Agent: Lovel as 1.0 ( +http://www.everatom.com)
Disallow: /
# LTI - The Lemur Toolkit for Language Modeling and Information Retrieval via Yahoo
User-Agent: LTI/LemurProject Nutch Spider/Nutch-1.0-dev (lti crawler for CMU; http://www.lti.cs.cmu.edu; changkuk at cmu dot edu)
Disallow: /
# LTI - The Lemur Toolkit for Language Modeling and Information Retrieval via Yahoo
User-Agent: LTI/LemurProject Nutch Spider/Nutch-1.0-dev (Research spider using Nutch; http://www.lemurproject.org; mhoy@cs.cmu.edu)
Disallow: /
# Luchs.at (Linux Wiki) link checking
User-Agent: luchs.at URL checker
Disallow: /
# Lunascape IE based browser (Japan)
User-Agent: Lunascape
Disallow: /
# Ultimate Search / Smartdesk (no website) robot
User-Agent: lwp-trivial/1.32
Disallow: /
# Search4free robot
User-Agent: lwp-trivial/1.34
Disallow: /
# Search4free robot
User-Agent: lwp-trivial/1.34
Disallow: /
# Expert HTML online source viewer
User-Agent: lwp-trivial/1.35
Disallow: /
# Expert HTML online source viewer
User-Agent: lwp-trivial/1.35
Disallow: /
# Perl LWP:Collective module
User-Agent: LWP::Simple/5.22
Disallow: /
# Perl LWP:Collective module
User-Agent: LWP::Simple/5.36
Disallow: /
# Perl LWP:Collective module - Linkomatic robot
User-Agent: LWP::Simple/5.48
Disallow: /
# Perl LWP:Collective module - secure-netz.de link checking (in conjunction w. libwww-perl/5.50)
User-Agent: LWP::Simple/5.50
Disallow: /
# Perl LWP:Collective module - Inktomi (62.253.64.x) robot
User-Agent: LWP::Simple/5.51
Disallow: /
# Perl LWP:Collective module - Only.com
User-Agent: LWP::Simple/5.53
Disallow: /
# Perl LWP:Collective module
User-Agent: LWP::Simple/5.63
Disallow: /
# ThePlanet/jaja-jak-globusy.com Google Adsense refferer spam bot from 70.85.116.* / 70.84.128.xxx / 70.85.193.xxx
User-Agent: LWP::Simple/5.803
Disallow: /
# Lycos spider
User-Agent: Lycos_Spider_(modspider)
Disallow: /
# Lycos spider
User-Agent: Lycos_Spider_(T-Rex)
Disallow: /
# Bobcat - Text and Lynx based DOS browser
User-Agent: Lynx/2-4-2 (Bobcat/0.5 [DOS] Jp Beta04)
Disallow: /
# Lynx 2.x text mode browser
User-Agent: Lynx/2.6 libwww-FM/2.14
Disallow: /
# seebot.org online service - uses lynx browser for crawlers view of web pages
User-Agent: Lynx/2.8 (;http://seebot.org)
Disallow: /
# Lynx 2.x text mode browser
User-Agent: Lynx/2.8.3dev.9 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6
Disallow: /
# Lynx 2.x text mode browser used as robot via cogentco.com
User-Agent: Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6c (human-guided@lerly.net)
Disallow: /
# Some spam bot
User-Agent: Mac Finder 1.0.xx
Disallow: /
# UKWizz search robot
User-Agent: Mackster( http://www.ukwizz.com )
Disallow: /
# Wind.it client user-agent ?
User-Agent: Mag-Net
Disallow: /
# All Magic/Wap wml service for mobile devices
User-Agent: MagicWML/1.0 (forcewml)
Disallow: /
# Magpie RSS - PHP RSS Parser
User-Agent: MagpieRSS/0.7x (+http://magpierss.sf.net)
Disallow: /
# Mahiti.com India search crawler
User-Agent: Mahiti.Com/Mahiti Crawler-1.0 (Mahiti.Com; http://mahiti.com ; mahiti.com)
Disallow: /
# Mail.ru search
User-Agent: Mail.Ru/1.0
Disallow: /
# Unknown Bea robot
User-Agent: mailto:webcraft@bea.com
Disallow: /
# SLI Systems mammoth robot
User-Agent: mammoth/1.0 ( http://www.sli-systems.com/)
Disallow: /
# Looksmart robot
User-Agent: MantraAgent
Disallow: /
# Map of the Internet visual search engine index robot
User-Agent: MapoftheInternet.com ( http://MapoftheInternet.com)
Disallow: /
# Kolibri.de robot
User-Agent: Mariner/5.1b [de] (Win95; I ;Kolibri gncwebbot)
Disallow: /
# Pilot Hitlist web site analysis
User-Agent: Marketwave Hit List
Disallow: /
# Looksmart directory page analysis
User-Agent: Martini
Disallow: /
# Looksmart directory page analysis
User-Agent: MARTINI
Disallow: /
# Marvin Medhunt robot
User-Agent: Marvin v0.3
Disallow: /
# Sagool search Japan robot
User-Agent: MaSagool/1.0 (MaSagool; http://sagool.jp/; info@sagool.jp)
Disallow: /
# Mass Downloader download manager
User-Agent: Mass Downloader 2.x
Disallow: /
# Masterseek (Scandinavia) Beta business search ?
User-Agent: MasterSeek
Disallow: /
# Lexibot (exMataHari) search software
User-Agent: Mata Hari/2.00
Disallow: /
# Virgilio Italy robot (212.48.11.xxx) using Fast Enterprise Search
User-Agent: Matrix S.p.A. - FAST Enterprise Crawler 6 (Unknown admin e-mail address)
Disallow: /
# Maxomo multimedia search robot
User-Agent: maxomobot/dev-20051201 (maxomo; http://67.102.134.34:4047/MAXOMO/MAXOMObot.html; maxomobot@maxomo.com)
Disallow: /
# Unknown robot from McAfee Austria (80.123.144.xx)
User-Agent: McBot/5.001 (windows; U; NT4.0; en-us)
Disallow: /
# MegaDownload files search robot
User-Agent: MDbot/1.0 (+http://www.megadownload.net/bot.html)
Disallow: /
# Substitute for Windows Media Player
User-Agent: Media Player Classic
Disallow: /
# Media Find crawler
User-Agent: MediaCrawler-1.0 (Experimental)
Disallow: /
# Google AdSense robot
User-Agent: Mediapartners-Google/2.1 ( http://www.googlebot.com/bot.html)
Disallow: /
# WWW.fi Media Search
User-Agent: MediaSearch/0.1
Disallow: /
# Search UK robot
User-Agent: MegaSheep v1.0 (www.searchuk.com internet sheep)
Disallow: /
# Megite web2.0 RSS and news service software
User-Agent: Megite2.0 (http://www.megite.com)
Disallow: /
# Mercator crawler software (used by Altavista)
User-Agent: Mercator-1.x
Disallow: /
# Mercator crawler software (used by Altavista)
User-Agent: Mercator-2.0
Disallow: /
# Mercator crawler software (used by Altavista)
User-Agent: Mercator-Scrub-1.1
Disallow: /
# MetaEuro.com Clustering Web Search Engine crawler
User-Agent: Metaeuro Web Crawler/0.2 (MetaEuro Web Search Clustering Engine; http://www.metaeuro.com; crawler at metaeuro dot com)
Disallow: /
# MetaGer search robot (Germany)
User-Agent: MetaGer-LinkChecker
Disallow: /
# MetaGer search robot (Germany)
User-Agent: MetagerBot/0.8-dev (MetagerBot; http://metager.de; )
Disallow: /
# MetaGer search robot (Germany)
User-Agent: MetaGer_PreChecker0.1
Disallow: /
# Download Express download manager
User-Agent: MetaProducts Download Express/1.x
Disallow: /
# Metaspinner search robot - Germany
User-Agent: Metaspinner/0.01 (Metaspinner; http://www.meta-spinner.de/; support@meta-spinner.de/)
Disallow: /
# Metatagsdir.com directory index spider
User-Agent: metatagsdir/0.7 (+http://metatagsdir.com/directory/)
Disallow: /
# Microsoft Foundation Class Library - i.e. used for e-mail harvesting from 68.154.96.xx (bellsouth.net)
User-Agent: MFC Foundation Class Library 4.0
Disallow: /
# Microsoft.com user agent
User-Agent: MFC_Tear_Sample
Disallow: /
# Advanced Site Crawler web site ripper and extractor
User-Agent: MFHttpScan
Disallow: /
# GigaBaz Brainbot (Germany) robot (213.139.152.xx)
User-Agent: MicroBaz
Disallow: /
# MS Office 2000 acting as WebDAV client
User-Agent: Microsoft Data Access Internet Publishing Provider Cache Manager
Disallow: /
# MS Office 2000 acting as WebDAV client
User-Agent: Microsoft Data Access Internet Publishing Provider DAV
Disallow: /
# Server probe for data access operations using MS Frontpage with OPTION header
User-Agent: Microsoft Data Access Internet Publishing Provider Protocol Discovery
Disallow: /
# MS Office 2000 acting as WebDAV client
User-Agent: Microsoft Data Access Internet Publishing Provider Protocol Discovery
Disallow: /
# Microsoft Log Parser text query tool
User-Agent: Microsoft Log Parser 2.2
Disallow: /
# MS Small Business Server content indexer
User-Agent: Microsoft Small Business Indexer
Disallow: /
# user agent looks for form-mail components (spam-bot)
User-Agent: Microsoft URL Control - 6.00.8xxx
Disallow: /
# Unknown robot from Microsoft.com (131.107.163.xx)
User-Agent: MicrosoftPrototypeCrawler (How's my crawling? mailto:newbiecrawler@hotmail.com)
Disallow: /
# Secure Computing SmartFilterWhere / Bess web filter (192.55.214.xx)
User-Agent: Microsoft_Internet_Explorer_5.00.438 (fjones@isd.net)
Disallow: /
# xpc-mii.net HTTP server message
User-Agent: MIIxpc/4.2
Disallow: /
# The Mindjet blog MindManager category
User-Agent: Mindjet MindManager
Disallow: /
# unknown robot via Korea Telecom (211.218.xxx.xxx)
User-Agent: minibot
Disallow: /
# mini- Rank website popularity tool
User-Agent: miniRank/1.6 (Website ranking; www.minirank.com; robot)
Disallow: /
# unknown robot via nec.co.jp Telecom (210.143.35.xx)
User-Agent: MiracleAlphaTest
Disallow: /
# Some spam bot
User-Agent: Missauga Locate 1.0.0
Disallow: /
# Some spam bot
User-Agent: Missigua Locator 1.9
Disallow: /
# Some spam bot
User-Agent: Missouri College Browse
Disallow: /
# Mister PiX picture finding software
User-Agent: Mister Pix II 2.02a
Disallow: /
# Mister PiX picture finding software
User-Agent: Mister PiX version.dll
Disallow: /
# Misterbot search France robot
User-Agent: Misterbot-Nutch/0.7.1 (Misterbot-Nutch; http://www.misterbot.fr; admin@misterbot.fr)
Disallow: /
# Miva / ex Findwhat.com search robot (66.150.55.2xx)
User-Agent: Miva (AlgoFeedback@miva.com)
Disallow: /
# Some spam bot from Jasmine Internet - Bangkok (203.147.0.xx)
User-Agent: Mizzu Labs 2.2
Disallow: /
# Majestic-12 DSearch MJ12bot (Experimental distributed crawler)
User-Agent: MJ12bot/vx.x.x (http://majestic12.co.uk/bot.php?+)
Disallow: /
# Majestic-12 DSearch MJ12bot (Experimental distributed crawler)
User-Agent: MJ12bot/vx.x.x (http://www.majestic12.co.uk/projects/dsearch/mj12bot.php)
Disallow: /
# MJB SEO Club MJBot
User-Agent: MJBot (SEO assessment)
Disallow: /
# MLBot - metadata labs web crawler for building a media index (beta)
User-Agent: MLBot (www.metadatalabs.com)
Disallow: /
# mnoGoSearch (ex UdmSearch) software robot
User-Agent: MnogoSearch/3.2.xx
Disallow: /
# Unknown bad bot - maybe guestbook spamming or email harvesting
User-Agent: Mo College 1.9
Disallow: /
# Goo Japan / Inktomi robot (210.173.179.xx)
User-Agent: moget/x.x (moget@goo.ne.jp)
Disallow: /
# Goo Japan / Inktomi robot (210.173.179.xx)
User-Agent: mogimogi/1.0
Disallow: /
# moiNag - net.art generator
User-Agent: moiNAG 0.02
Disallow: /
# Mojeek Search Preview robot (217.155.205.xx)
User-Agent: MojeekBot/0.x (archi; http://www.mojeek.com/bot.html)
Disallow: /
# Greasemonkey Firefox extension
User-Agent: monkeyagent
Disallow: /
# Moonbrowser - IE based browser (Japan)
User-Agent: MoonBrowser (version 0.41 Beta4)
Disallow: /
# Moreover / FeedDirect RSS feed robot
User-Agent: Moreoverbot/x.00 (+http://www.moreover.com)
Disallow: /
# MixCat robot s. also Felix
User-Agent: Morris - Mixcat Crawler ( http://mixcat.com)
Disallow: /
# Robots.txt online checker
User-Agent: Motoricerca-Robots.txt-Checker/1.0 (http://tool.motoricerca.info/robots-checker.phtml)
Disallow: /
# Obigo WAP browser for mobiles on Motorola V3
User-Agent: Motorola-V3m Obigo
Disallow: /
# MPRM Group Ltd. Spider Monkey robot
User-Agent: Mouse-House/7.4 (spider_monkey spider info at www.mobrien.com/sm.shtml)
Disallow: /
# Movable Type web-based personal publishing system
User-Agent: MovableType/x.x
Disallow: /
# Mozdex Open search engine spider (65.98.100.2xx)
User-Agent: mozDex/0.xx-dev (mozDex; http://www.mozdex.com/en/bot.html; spider@mozdex.com)
Disallow: /
# Bell Nexxia / Sympatico Canada user robot & spoofed referer from diff. IPs
User-Agent: Mozi!
Disallow: /
# MetaExplorer project's MetaQuerier robot
User-Agent: MQbot metaquerier.cs.uiuc.edu/crawler
Disallow: /
# MetaExplorer project's MetaQuerier robot
User-Agent: MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler; http://falcon.cs.uiuc.edu; mqbot@cs.uiuc.edu)
Disallow: /
# MS Frontpage 4.x
User-Agent: MSFrontPage/4.0
Disallow: /
# Some faked UA - maybe for a download manager
User-Agent: MSIE 4.0 (Win95)
Disallow: /
# unknown robot from gw.ocg-corp.com (209.126.176.x)
User-Agent: MSIE-5.13 (larbin@unspecified.mail)
Disallow: /
# MSN media search robot
User-Agent: msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)
Disallow: /
# Windows Live product search (Beta) robot
User-Agent: msnbot-Products/1.0 (+http://search.msn.com/msnbot.htm)
Disallow: /
# MSN Search robot - 131.107.xxx.xxx 204.95.96.xxx - 204.95.111.xxx 207.46.xxx.xxx
User-Agent: MSNBOT/0.xx (http://search.msn.com/msnbot.htm)
Disallow: /
# MSN Search robot - 131.107.xxx.xxx 204.95.96.xxx - 204.95.111.xxx 207.46.xxx.xxx
User-Agent: msnbot/x.xx ( http://search.msn.com/msnbot.htm)
Disallow: /
# Microsoft search for mobiles
User-Agent: MSNBOT_Mobile MSMOBOT Mozilla/2.0 (compatible; MSIE 4.02; Windows CE; Default)
Disallow: /
# MSN Search robot - 131.107.xxx.xxx 204.95.96.xxx - 204.95.111.xxx 207.46.xxx.xxx
User-Agent: MSNPTC/1.0
Disallow: /
# Microsoft proxy server
User-Agent: MSProxy/2.0
Disallow: /
# MacEdition CodeBitch link checking
User-Agent: MSRBOT
Disallow: /
# Microsoft MSRBot
User-Agent: MSRBOT (http://research.microsoft.com/research/sv/msrbot)
Disallow: /
# StreamBox VCR user agent
User-Agent: Mulder, VCR-1.0
Disallow: /
# Multiblocker (Fantomaster) anonymity software user
User-Agent: multiBlocker browser
Disallow: /
# MultiCrawler for DERI Galway's Semantic Web Search Engine cluster
User-Agent: multicrawler ( http://sw.deri.org/2006/04/multicrawler/robots.html)
Disallow: /
# Virginia Tech Digital Library Research Laboratory robot
User-Agent: MultiText/0.1
Disallow: /
# SoMusical! musical directory link checking
User-Agent: MusicWalker2.0 ( http://www.somusical.com)
Disallow: /
# Unknown bad bot from diff. Taiwanese IPs
User-Agent: MVAClient
Disallow: /
# Windows HTTP Services (WinHTTP)
User-Agent: My WinHTTP Connection
Disallow: /
# unknown user robot (24.124.34.42)
User-Agent: myDaemon
Disallow: /
# GetRight download manager
User-Agent: MyGetRight/1.0.0
Disallow: /
# GetRight download manager
User-Agent: MyGetRight/1.0b
Disallow: /
# Mylinea France web catalogue crawler
User-Agent: Mylinea.com Crawler 2.0
Disallow: /
# VDOG - SEO webdirecory (Germany) link checking
User-Agent: mylinkcheck/1.02
Disallow: /
# OpenAcoon open source search engine (used by Acoon search)
User-Agent: OpenAcoon v4.0.x (www.openacoon.de)
Disallow: /
# Unknown robots from diff. IPs
User-Agent: Mozilla
Disallow: /
# libwhisker - HTTP client and utility - Perl library
User-Agent: Mozilla (libwhisker/2.4)
Disallow: /
# http://www.somewhere.com robot
User-Agent: Mozilla (Mozilla@somewhere.com)
Disallow: /
# BotSeer search engine for robots.txt
User-Agent: Mozilla 4.0(compatible; BotSeer/1.0; +http://botseer.ist.psu.edu)
Disallow: /
# PDA Pocket IE 2.x Windows CE
User-Agent: Mozilla/1.1 (compatible; MSPIE 2.0; Windows CE)
Disallow: /
# Oregano browser for RISC OS
User-Agent: Mozilla/1.10 [en] (Compatible; RISC OS 3.70; Oregano 1.10)
Disallow: /
# IE 2.x WinNT
User-Agent: Mozilla/1.22 (compatible; MSIE 2.0d; Windows NT)
Disallow: /
# EudoraWeb 2.0 browser (Eudora Internet Suite) for PalmOS
User-Agent: Mozilla/1.22 (compatible; MSIE 5.01; PalmOS 3.0) EudoraWeb 2
Disallow: /
# Dummy user agent - i.e. used by Namo Web Editor
User-Agent: Mozilla/2.0
Disallow: /
# AOL Mac
User-Agent: Mozilla/2.0 (compatible; AOL 3.0; Mac_PowerPC)
Disallow: /
# AOL Win 3.x
User-Agent: Mozilla/2.0 (Compatible; AOL-IWENG 3.0; Win16)
Disallow: /
# Ask Jeeves /Teoma robot
User-Agent: Mozilla/2.0 (compatible; Ask Jeeves)
Disallow: /
# Ask Jeeves /Teoma robot
User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma)
Disallow: /
# Ask Jeeves /Teoma robot
User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml)
Disallow: /
# Ask Jeeves /Teoma robot
User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://sp.ask.com/docs/about/tech_crawling.html)
Disallow: /
# Direct Hit Robot
User-Agent: Mozilla/2.0 (compatible; EZResult -- Internet Search Engine)
Disallow: /
# MS Frontpage x.x web editor
User-Agent: Mozilla/2.0 (compatible; MS FrontPage x.0)
Disallow: /
# IE 2.x Mac Power PC
User-Agent: Mozilla/2.0 (compatible; MSIE 2.1; Mac_PowerPC)
Disallow: /
# IE 3.x WinNT
User-Agent: Mozilla/2.0 (compatible; MSIE 3.02; Update a; AK; Windows NT)
Disallow: /
# IE 3.x AOL Win95
User-Agent: Mozilla/2.0 (compatible; MSIE 3.02; Update a; AOL 3.0; Windows 95)
Disallow: /
# IE 3.x Win95
User-Agent: Mozilla/2.0 (compatible; MSIE 3.0; AK; Windows 95)
Disallow: /
# IE 3.x Win 3.1
User-Agent: Mozilla/2.0 (compatible; MSIE 3.0; Windows 3.1)
Disallow: /
# IE 3.x WinXP
User-Agent: Mozilla/2.0 (compatible; MSIE 3.0B; Win32)
Disallow: /
# Borland Delphi .OCX component used by WebCollector email harverster
User-Agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32)
Disallow: /
# Thunderstone's Webinator Web indexing program
User-Agent: Mozilla/2.0 (compatible; T-H-U-N-D-E-R-S-T-O-N-E)
Disallow: /
# Check&Get bookmark and link checking tool
User-Agent: Mozilla/2.0 compatible; Check&Get 1.1x (Windows 98)
Disallow: /
# Netscape 2.x Win3.x International
User-Agent: Mozilla/2.01 (Win16; I)
Disallow: /
# Netscape 2.x Gold Win95
User-Agent: Mozilla/2.02Gold (Win95; I)
Disallow: /
# Faked user agent for diff. purposes i.e.: - some download manager - E-mail harvesting
User-Agent: Mozilla/3.0 (compatible)
Disallow: /
# AvantGo PDA browser
User-Agent: Mozilla/3.0 (compatible; AvantGo 3.2)
Disallow: /
# Searchhippo robot
User-Agent: Mozilla/3.0 (compatible; Fluffy the spider; http://www.searchhippo.com/; info@searchhippo.com)
Disallow: /
# HP Web PrintSmart (discontinued) - web page printing software
User-Agent: Mozilla/3.0 (compatible; HP Web PrintSmart 04b0 1.0.1.34)
Disallow: /
# Internet Direct Library for Borland (often used as e-mail address collector and mass mailing tool)
User-Agent: Mozilla/3.0 (compatible; Indy Library)
Disallow: /
# Outertechs Linkman bookmark tool
User-Agent: Mozilla/3.0 (compatible; Linkman)
Disallow: /
# Euroferret robot
User-Agent: Mozilla/3.0 (compatible; MuscatFerret/1.5.4; claude@euroferret.com)
Disallow: /
# Euroferret robot
User-Agent: Mozilla/3.0 (compatible; MuscatFerret/1.5; olly@muscat.co.uk)
Disallow: /
# Euroferret robot
User-Agent: Mozilla/3.0 (compatible; MuscatFerret/1.6.x; claude@euroferret.com)
Disallow: /
# Netart Generator - script generated random websites
User-Agent: Mozilla/3.0 (compatible; netart generator/1.0; libwww-perl/5.64)
Disallow: /
# NetPositive BEOS browser
User-Agent: Mozilla/3.0 (compatible; NetPositive/2.2)
Disallow: /
# Opera 3.x Win3.x
User-Agent: Mozilla/3.0 (compatible; Opera/3.0; Windows 3.1) v3.1
Disallow: /
# Opera 3.x Win95/NT
User-Agent: Mozilla/3.0 (compatible; Opera/3.0; Windows 95/NT4) 3.2
Disallow: /
# Perman Surfer bookmark tool
User-Agent: Mozilla/3.0 (compatible; PerMan Surfer 3.0; Win95)
Disallow: /
# Web Link Validator link validation software
User-Agent: Mozilla/3.0 (compatible; REL Software Web Link Validator 2.x)
Disallow: /
# Scan4Mail online mail extraction service
User-Agent: Mozilla/3.0 (compatible; scan4mail (advanced version) http://www.peterspages.net/?scan4mail)
Disallow: /
# WebWobot UK search engine robot (82.43.129.2xx)
User-Agent: Mozilla/3.0 (compatible; ScollSpider; http://www.webwobot.com)
Disallow: /
# Web Link Validator link validation software
User-Agent: Mozilla/3.0 (compatible; Web Link Validator 2.x)Web Link Validator http://www.relsoftware.com/ link validation software
Disallow: /
# Xelios Web Capture (now Wysigot) website downloading tool (Discontinued)
User-Agent: Mozilla/3.0 (compatible; WebCapture x.x; Auto; Windows)
Disallow: /
# Iprospect search engine positioning using Thunderstone's Webinator
User-Agent: Mozilla/3.0 (compatible; Webinator-DEV01.home.iprospect.com/2.56)
Disallow: /
# CyberAlert's Media Monitor using Thunderstone's Webinator
User-Agent: Mozilla/3.0 (compatible; Webinator-indexer.cyberalert.com/2.56)
Disallow: /
# Viking server user/client
User-Agent: Mozilla/3.0 (Compatible;Viking/1.8)
Disallow: /
# One of DC-Sakuras download manager user-agent names
User-Agent: Mozilla/3.0 (DreamPassport/3.0)
Disallow: /
# Ilse Netherlands robot (62.69.178.xx)
User-Agent: Mozilla/3.0 (INGRID/3.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)
Disallow: /
# Liberate DTV server suite / TV-emulator
User-Agent: Mozilla/3.0 (Liberate DTV 1.1)
Disallow: /
# Planetweb 2.1 Browser (discontinued) for Dreamcast
User-Agent: Mozilla/3.0 (Planetweb/2.100 JS SSL US; Dreamcast US)
Disallow: /
# http://www.goo.ne.jp /Inktomi robot
User-Agent: Mozilla/3.0 (Slurp.so/Goo; slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Inktomi (Hotbot-Lycos-NBCi) robot - 72.30.61.xx(x)
User-Agent: Mozilla/3.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Inktomi (Hotbot-Lycos-NBCi) robot - 72.30.61.xx(x)
User-Agent: Mozilla/3.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Mozilla/3.0 (Vagabondo/1.1 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Mozilla/3.0 (Vagabondo/1.x MT; webagent@wise-guys.nl; http://webagent.wise-guys.nl/)
Disallow: /
# Ilse Netherlands robot
User-Agent: Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)
Disallow: /
# Netscape 3.x Win3.x
User-Agent: Mozilla/3.0 (Win16; I)
Disallow: /
# Netscape 3.x Win95
User-Agent: Mozilla/3.0 (Win95; I)
Disallow: /
# Netscape 3.x WinNT
User-Agent: Mozilla/3.0 (WinNT; I)
Disallow: /
# Netscape 3.x FreeBSD
User-Agent: Mozilla/3.0 (WorldGate Gazelle 3.5.1 build 11; FreeBSD2.2.8-STABLE)
Disallow: /
# Netscape 3.x OSF1 V4.0 alpha
User-Agent: Mozilla/3.0 (X11; I; OSF1 V4.0 alpha)
Disallow: /
# AOL Web TV
User-Agent: Mozilla/3.0 NAVIO_AOLTV (11; 13; Philips; PH200; 1; R2.0C36_AOL.0110OPTIK; R2.0.0139d_OPTIK)
Disallow: /
# WebTV
User-Agent: Mozilla/3.0 WebTV/1.2 (compatible; MSIE 2.0)
Disallow: /
# Amiga Voyager Browser Amiga
User-Agent: Mozilla/3.01 (compatible; AmigaVoyager/2.95; AmigaOS/MC680x0)
Disallow: /
# Links2Go robot
User-Agent: Mozilla/3.01 (Compatible; Links2Go Similarity Engine)
Disallow: /
# Netgem Netbox cable modem TV Box Linux
User-Agent: Mozilla/3.01 (compatible; Netbox/3.5 R92; Linux 2.2)
Disallow: /
# Netscape 3.x Mac
User-Agent: Mozilla/3.01-C-MACOS8 (Macintosh; I; PPC)
Disallow: /
# Netscape 3.x Linux
User-Agent: Mozilla/3.01Gold (X11; I; Linux 2.0.32 i486)
Disallow: /
# Netscape 3.x SunOS
User-Agent: Mozilla/3.01Gold (X11; I; SunOS 5.5.1 sun4m)
Disallow: /
# Netscape 3.x Irix
User-Agent: Mozilla/3.01SGoldC-SGI (X11; I; IRIX 6.3 IP32)
Disallow: /
# ANT Fresco Browser Risc OS
User-Agent: Mozilla/3.04 (compatible; ANTFresco/2.13; RISC OS 4.02)
Disallow: /
# NCBrowser ANT Fresco Browser Risc OS
User-Agent: Mozilla/3.04 (compatible; NCBrowser/2.35; ANTFresco/2.17; RISC OS-NC 5.13 Laz1UK1309)
Disallow: /
# QNX OS Voyager embedded browser
User-Agent: Mozilla/3.04 (compatible;QNX Voyager 2.03B ;Photon)
Disallow: /
# I-Opener (was www.netpliance.com/) web PC
User-Agent: Mozilla/3.x (I-Opener 1.1; Netpliance)
Disallow: /
# Yahoo Mindset: Intent-driven Search (66.228.182.1xx)
User-Agent: Mozilla/4.0
Disallow: /
# Agada search (Germany) robot
User-Agent: Mozilla/4.0 (agadine3.0) www.agada.de
Disallow: /
# URLBase 6 bookmark manager
User-Agent: Mozilla/4.0 (Compatible); URLBase 6
Disallow: /
# Astrafind! adult search robot (66.98.252.xx)
User-Agent: Mozilla/4.0 (compatible: AstraSpider V.2.1 : astrafind.com)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Mozilla/4.0 (compatible; Vagabondo/2.2; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Mozilla/4.0 (compatible; Vagabondo/4.0Beta; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)
Disallow: /
# ReGet Deluxe! download manager
User-Agent: Mozilla/4.0 (compatible; ReGet Deluxe 5.1; Windows NT 5.1)
Disallow: /
# Advanced Email Extractor e-mail collector (spam bot)
User-Agent: Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)
Disallow: /
# Arachmo Spider - web site file extraction tool
User-Agent: Mozilla/4.0 (compatible; Arachmo)
Disallow: /
# Novell Border Manager security suite
User-Agent: Mozilla/4.0 (compatible; BorderManager 3.0)
Disallow: /
# Best of the Web directory link checking
User-Agent: Mozilla/4.0 (compatible; BOTW Spider; +http://botw.org)
Disallow: /
# Blitzsuche Germany robot
User-Agent: Mozilla/4.0 (compatible; B_L_I_T_Z_B_O_T)
Disallow: /
# Content Control from Blue Coat
User-Agent: Mozilla/4.0 (compatible; Cerberian Drtrs Version-3.2-Build-0)
Disallow: /
# Check&Get bookmark manager, web change monitor and archiver
User-Agent: Mozilla/4.0 (compatible; Check&Get 3.0; Windows NT)
Disallow: /
# Christcentral.com Christcrawler (was www.christcrawler.com)
User-Agent: Mozilla/4.0 (compatible; ChristCrawler.com ChristCrawler@ChristCENTRAL.com)
Disallow: /
# Yahoo Search Marketing crawler (68.142.211.1xx)
User-Agent: Mozilla/4.0 (compatible; crawlx, crawler@trd.overture.com)
Disallow: /
# DAUMOA - Daum search Korea robot (211.115.109.xxx)
User-Agent: Mozilla/4.0 (compatible; DAUMOA-video; +http://ws.daum.net/aboutkr.html)
Disallow: /
# DepSpid distributed web crawler for link dependencies
User-Agent: Mozilla/4.0 (compatible; DepSpid/5.0x; +http://about.depspid.net)
Disallow: /
# Download Mage download manager
User-Agent: Mozilla/4.0 (compatible; DnloadMage 1.0)
Disallow: /
# Fast/Alltheweb crawler
User-Agent: Mozilla/4.0 (compatible; FastCrawler3 support-fastcrawler3@fast.no)
Disallow: /
# Fluid Dynamics Search Engine (FDSE) robot used by Abadoor.de
User-Agent: Mozilla/4.0 (compatible; FDSE robot)
Disallow: /
# GPU Distributed Search Engine crawler
User-Agent: Mozilla/4.0 (compatible; GPU p2p crawler http://gpu.sourceforge.net/search_engine.php)
Disallow: /
# Grub open source crawler
User-Agent: Mozilla/4.0 (compatible; grub-client-0.2.x; Crawl your stuff with http://grub.org)
Disallow: /
# Grub open source crawler
User-Agent: Mozilla/4.0 (compatible; grub-client-0.3.x; Crawl your own stuff with http://grub.org)
Disallow: /
# Grub open source crawler used by Looksmart ( 64.241.242.xx)
User-Agent: Mozilla/4.0 (compatible; grub-client-2.x)
Disallow: /
# ibisBrowser Japanese mobile browser
User-Agent: Mozilla/4.0 (compatible; ibisBrowser)
Disallow: /
# Novell iChain Cool Solutions caching
User-Agent: Mozilla/4.0 (compatible; ICS 1.2.xxx)
Disallow: /
# IE Favorites Check - Bookmark manager
User-Agent: Mozilla/4.0 (compatible; IE-Favorites-Check-0.5)
Disallow: /
# Iplexx Austria (webhosting company) logfile spamming bot
User-Agent: Mozilla/4.0 (compatible; Iplexx Spider/1.0 http://www.iplexx.at)
Disallow: /
# KeepNi Monitors - Web site monitoring / link checking tool
User-Agent: Mozilla/4.0 (compatible; KeepNI web site monitor)
Disallow: /
# NetPromoter Link Utility link checking tool
User-Agent: Mozilla/4.0 (compatible; Link Utility; http://net-promoter.com)
Disallow: /
# Lotus Notes 5.0 browser
User-Agent: Mozilla/4.0 (compatible; Lotus-Notes/5.0; Windows-NT)
Disallow: /
# IE 4.x AOL Win98
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; AOL 4.0; Windows 98)
Disallow: /
# IE 4.x Mac Power PC
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Mac_PowerPC)
Disallow: /
# Internet Explorer 4.0 URL check
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; MSIECrawler; Windows 95)
Disallow: /
# Vonna search robot
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Vonna.com b o t)
Disallow: /
# IE 4.x Win95
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows 95)
Disallow: /
# IE PDA Browser Windows CE
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; MSN Companion 2.0; 800x600; Compaq)
Disallow: /
# Orange France robot for mobiles
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPC; 240x320; SPV M700; OpVer 19.123.2.733) OrangeBot-Mobile 2008.0 (mobilesearch.support@orange-ftgroup.com)
Disallow: /
# IE for Windows CE on a PocketPC (HP iPAQ)
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPS; 240x320)
Disallow: /
# IE PDA Browser Windows CE
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows NT Windows CE)
Disallow: /
# IE 4.x WinNT
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows NT)
Disallow: /
# diff. IPs / services i.e.: - Microsoft server information robot (see link) - Okanagan Internet Junction web filter (robot)
User-Agent: Mozilla/4.0 (compatible; MSIE 4.01; Windows NT; MS Search 4.0 Robot) Microsoft
Disallow: /
# Unknown robot from American College of Radiology (ACR) running MS Site Server
User-Agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) ACR
Disallow: /
# Indonesia Interactive Web-portal robot on MS Site Server
User-Agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) Indonesia Interactive
Disallow: /
# Webquestdesigns hosting
User-Agent: Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) WebQuest Designs
Disallow: /
# Avirt Gateway proxy server
User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows 95) via Avirt Gateway Server v4.0
Disallow: /
# Metacarta.com (66.28.xx.xxx) robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) (samualt9@bigfoot.com)
Disallow: /
# NetCaptor IE browser addon
User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0; NetCaptor 6.5.0RC1)
Disallow: /
# IE 5.x AOL Win95 Sureseeker search plugin
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; AOL 5.0; Windows 95; DigExt; Gateway2000; sureseeker.com)
Disallow: /
# IE 5.x Mac PowerPC AtHome user
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Mac_PowerPC; AtHome021)
Disallow: /
# www.netnose.com crawler
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; NetNose-Crawler 2.0; A New Search Experience: http://www.netnose.com)
Disallow: /
# WinXP via CERN httpd proxy server
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Win32) via proxy gateway CERN-HTTPD/3.0 libwww/2.17
Disallow: /
# Echo.com robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) TrueRobot; 1.5
Disallow: /
# Voila.fr robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)
Disallow: /
# Voila.fr robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot; 1.6
Disallow: /
# WinME Opera 5.x
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows ME) Opera 5.11 [en]
Disallow: /
# Kyosoft's Link Checker
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows ME; Link Checker 2.x.xx http://www.kyosoft.com)
Disallow: /
# Beijing Express Email Address Extractor via DHCP Data Transport Services (DTS)
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent
Disallow: /
# Girafa (browser plug-in) robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; Girafabot; girafabot at girafa dot com; http://www.girafa.com)
Disallow: /
# Galaxy robot (63.121.41.xxx)
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com)
Disallow: /
# Galaxy robot (63.121.41.xxx)
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com/; http://www.galaxy.com/info/crawler.html)
Disallow: /
# Yandex Search Russia link checking (213.180.206.2xx)
User-Agent: Mozilla/4.0 (compatible; MSIE 5.0; YANDEX)
Disallow: /
# Gobeez starting page plugin
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; AOL 4.0; Windows 98; GoBeez (www.gobeez.com))
Disallow: /
# IE 5.5 Win95 Hotbar plug-in
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 95; Transmission Segment; Hotbar 2.0)
Disallow: /
# Crazy Browser - IE based tabbed Browser
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Crazy Browser 1.x.x)
Disallow: /
# Wanadoo Internet services
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; KITV4.7 Wanadoo)
Disallow: /
# Safexplorer (safexplorer.com - site is offline) kids browser
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; SAFEXPLORER TL)
Disallow: /
# Katiesoft Scroll (ex www.katiesoft.com now discarded) & SimulBrowse (ex www.simulbrowse.com now dead) IE browser plugins
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; SYMPA; Katiesoft 7; SimulBrowse 3.0)
Disallow: /
# Windows ME BTOpenworld Internet services
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90; BTinternet V8.1)
Disallow: /
# Windows ME Internet Explorer URL check
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90; MSIECrawler)
Disallow: /
# Cobion Germany Brand Protection Services robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; obot)
Disallow: /
# Cobion Germany Brand Protection Services robot
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; QXW03018)
Disallow: /
# IE 5.5 Win2000 / user agent
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0) Active Cache Request
Disallow: /
# Maybe: - MS Internet Security & Acceleration Server (ISA) cache refreshing request (see link) or - IE 5.5 Win2000 probably with some (website) API request component (see 2nd link) - suspected as email-harvester / site scanning tool (see http://www.byte.com/documents/s=493/byt20010208s0001/index.htm
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0) Fetch API Request
Disallow: /
# IE 5.5 Win2000 with MS.NET SDK
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; .NET CLR 1.0.3705)
Disallow: /
# IE 5.5 Win2000 / user agent w. AI RoboForm (AIRF) password manager
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; AIRF)
Disallow: /
# AspTear URL fetching program component / Download32.com spider
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; AspTear 1.5)
Disallow: /
# Nokia.com network
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; N_o_k_i_a)
Disallow: /
# Unknown Object Sciences Corp. robot using the HTTPClient
User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; T312461) RPT-HTTPClient/0.3-3E
Disallow: /
# Singingfish media spider (64.12.186.2xx) via AOL search
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0 compatible; Asterias Crawler v4; +http://www.singingfish.com/help/spider.html; webmaster@singingfish.com); SpiderThread Revision: 3.10
Disallow: /
# Megaupload Mega Manager - Download manager toolbar for IE
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; AOL 9.0; Windows 98; .NET CLR 1.1.4322; MEGAUPLOAD 2.0)
Disallow: /
# Hotbar IE graphical skin
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; AOL 9.0; Windows NT 5.1; SV1; HbTools 4.7.2)
Disallow: /
# Skaffe.com directory link checker
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Skampy/0.9.x [en]
Disallow: /
# TargetSeek Crawler concerning electronics industry product announcements
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; TargetSeek/1.0; +http://www.targetgroups.net/TargetSeek.html)
Disallow: /
# IE 6.0 WebWasher ad filter
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Win32) WebWasher 3.0
Disallow: /
# Web Link Validator link validation software
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows 98) REL Software Web Link Validator 2.x)
Disallow: /
# Web Link Validator link validation software
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows 98) Web Link Validator 2.x)
Disallow: /
# IE 6.0 Netmanager IE add-on
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Net M@nager V3.02 - www.vinn.com.au)
Disallow: /
# WebLight web analyzer & link checker
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; support@illumit.com; http://www.illumit.com/Products/weblight/)
Disallow: /
# Abolimba Multibrowser - IE based browser
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Win 9x 4.90; http://www.Abolimba.de)
Disallow: /
# Lunascape IE based browser (Japan)
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; Lunascape 2.1.3)
Disallow: /
# Google wireless transcoder (GWT) proxy for rewriting websites for mobiles
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;)
Disallow: /
# Tüzilla (Germany) - ODP link checking using Robozilla
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP entries t_st; http://tuezilla.de/t_st-odp-entries-agent.html)
Disallow: /
# Tüzilla (Germany) - ODP link checking using Robozilla
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP links test; http://tuezilla.de/test-odp-links-agent.html)
Disallow: /
# ZoomSpider.Net indexing robot for several directorys
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)
Disallow: /
# unknown robot from 64.246.44.xx
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) (dns_admin@c-a-s-h.com)
Disallow: /
# Covac Software UPPS (Universal PHP Proxy Server) - free public proxy server
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Covac UPPS Cathan 1.2.5;)
Disallow: /
# GetNetWise Crayon Crawler web filter
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Crayon Crawler; snprtz|T04056566514940; (R1 1.5))
Disallow: /
# Deepnet Explorer - IE based browser
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Deepnet Explorer)
Disallow: /
# Heritrix Internet Archive's open-source web project used by Analysis Projects at UW
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; heritrix/1.3.0 http://www.cs.washington.edu/research/networking/websys/)
Disallow: /
# IE 6x WinXP Hotbar plug-in
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Hotbar 3.0)
Disallow: /
# IE 6x WinXP iOpus Internet Macros - Internet-based macro recorder
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; iOpus-I-M)
Disallow: /
# iRider - IE based browser / Free Download Manager (FDM)
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; iRider 2.21.1108; FDM)
Disallow: /
# KKman http://www.kkman.com/ - Japanese IE based browser
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; KKman3.0)
Disallow: /
# IE 6x WinXP MathPlayer mathematical notation plugin
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; MathPlayer2.0)
Disallow: /
# Maxton (ex MyIE2) - IE based browser
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon)
Disallow: /
# IE 6x WinXP peoplepc online PeoplePal IE toolbar
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; PeoplePal 3.0; MSIECrawler)
Disallow: /
# IE 6x WinXP / I-Opener (was www.netpliance.com/) web PC
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Q312461; IOpener Release 1.1.04)
Disallow: /
# Qihoo search (China) robot
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 qihoobot@qihoo.net)
Disallow: /
# SimBar IE toolbar for accessing The Sims sites / Infopath IE form & spreadsheet plugin
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SIMBAR Enabled; InfoPath.1)
Disallow: /
# IE 6x WinXP Stumble Upon IE toolbar
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; StumbleUpon.com 1.760; .NET CLR 1.1.4322)
Disallow: /
# Balsa Productions embedded web browser package for Borland Delphi
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Embedded Web Browser from: http://bsalsa.com/; MSIECrawler)
Disallow: /
# ChangeDetection robot for web page monitoring
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )
Disallow: /
# IE 6x WinXP also used by WebSite Pro HTML editor
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)
Disallow: /
# DX-Browser - German IE based browser
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; DX-Browser 5.0.0.0)
Disallow: /
# ezPeer+ P2P IE addon
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; ezPeer+ v1.0 Beta (0.4.1.98); ezPeer+ v1.0 (0.5.0.00); .NET CLR 1.1.4322; MSIECrawler)
Disallow: /
# Axandra IBP website promotion software ?
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; IBP; .NET CLR 1.1.4322)
Disallow: /
# MRA = Mail.ru Agent - Instant Messenger / VoIP
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; MRA 4.3 (build 01218))
Disallow: /
# MSN Search robot (207.46.89.xx)
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)
Disallow: /
# Link Commander bookmark manager
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Win32) Link Commander 4.0
Disallow: /
# IE 7.0 - WinXP
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; bgft)
Disallow: /
# GTB = Google Toolbar Internet Explorer add-on
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GTB5; User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://bsalsa.com) ; .NET CLR 2.0.50727)
Disallow: /
# MSIE 7.0 *and* Trident token used by Internet Explorer 8 in compatibility view mode
User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Tablet PC 2.0)
Disallow: /
# Internet Explorer 8
User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 1.1.4322; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648)
Disallow: /
# IE 8.0 (beta) on Win Vista
User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)
Disallow: /
# DAUMOA - Daum search Korea robot (211.115.109.xxx)
User-Agent: Mozilla/4.0 (compatible; MSIE enviable; DAUMOA 2.0; DAUM Web Robot; Daum Communications Corp., Korea; +http://ws.daum.net/aboutkr.html)
Disallow: /
# DAUMOA - Daum search Korea robot (211.115.109.xxx)
User-Agent: Mozilla/4.0 (compatible; MSIE is not me; DAUMOA/1.0.1; DAUM Web Robot; Daum Communications Corp., Korea)
Disallow: /
# Naver Search Korea Naverbot
User-Agent: Mozilla/4.0 (compatible; NaverBot/1.0; http://help.naver.com/delete_main.asp)
Disallow: /
# Netcraft webserver info
User-Agent: Mozilla/4.0 (compatible; Netcraft Web Server Survey)
Disallow: /
# NetPromoter Link Utility link checking tool
User-Agent: Mozilla/4.0 (compatible; NetPromoter Spider;http://www.net-promoter.com/)
Disallow: /
# Opera 3.x WinNT
User-Agent: Mozilla/4.0 (compatible; Opera/3.0; Windows 4.10) 3.51 [en]
Disallow: /
# Powermarks bookmark manager
User-Agent: Mozilla/4.0 (compatible; Powermarks/3.5; Windows 95/98/2000/NT)
Disallow: /
# RSS Popper - MS Outlook RSS reader plugin
User-Agent: Mozilla/4.0 (compatible; RSS Popper)
Disallow: /
# SiteKiosk public terminal browser
User-Agent: Mozilla/4.0 (compatible; SiteKiosk 4.0; MSIE 5.0; Windows 98; SiteCoach 1.0)
Disallow: /
# Entireweb Search Speedyspider (62.13.25.xxx)
User-Agent: Mozilla/4.0 (compatible; SpeedySpider; www.entireweb.com)
Disallow: /
# SiteProbe - website status checking
User-Agent: Mozilla/4.0 (compatible; SPENG)
Disallow: /
# Super Cleaner privacy tool (bookmark checking)
User-Agent: Mozilla/4.0 (compatible; SuperCleaner 2.xx; Windows 98)
Disallow: /
# Synapse - Apache web service for processing XML documents
User-Agent: Mozilla/4.0 (compatible; Synapse)
Disallow: /
# Web2PDF - Adobe Acrobat plugin for site traversal and other services for the Web Capture feature
User-Agent: Mozilla/4.0 (compatible; WebCapture 3.0; Windows)
Disallow: /
# Windows HTTP Services (WinHTTP / XML-parser)
User-Agent: Mozilla/4.0 (compatible; Win32; WinHttp.WinHttpRequest.5)
Disallow: /
# WSN Links PHP directory software
User-Agent: Mozilla/4.0 (compatible; WSN Links)
Disallow: /
# Euro Directory (German / Austrian) directory link checking
User-Agent: Mozilla/4.0 (compatible; www.euro-directory.com; urlchecker1.0)
Disallow: /
# Galaxy robot
User-Agent: Mozilla/4.0 (compatible; www.galaxy.com)
Disallow: /
# Linkguard.com link validation (service is offline)
User-Agent: Mozilla/4.0 (compatible; www.linkguard.com Linkguard Online 1.0; Windows NT)
Disallow: /
# Yahoo Search Japan robot (203.141.52.)
User-Agent: Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)
Disallow: /
# Yahoo Japan robot (202.93.76.xx)
User-Agent: Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)
Disallow: /
# Faked IE id string used by DeepTrawl link checking tool
User-Agent: Mozilla/4.0 (compatible;MSIE 6.0; Windows NT 5.0; H010818)
Disallow: /
# spoofed referer by Fantomaster (Multiblocker) anonymity products
User-Agent: Mozilla/4.0 (fantomBrowser)
Disallow: /
# spoofed referer by Fantomaster (Multiblocker) anonymity products
User-Agent: Mozilla/4.0 (fantomCrew Browser)
Disallow: /
# unknown robot from - 64.57.223.40 - 66.28.233.xxx (cogentco.com)
User-Agent: Mozilla/4.0 (hhjhj@yahoo.com)
Disallow: /
# Activtourist Jemma spider
User-Agent: Mozilla/4.0 (JemmaTheTourist;http://www.activtourist.com)
Disallow: /
# NetFront (v3.x) for Pocket PC (here on Sanyo PM-8200 cell phone)
User-Agent: Mozilla/4.0 (MobilePhone PM-8200/US/1.0) NetFront/3.x MMP/2.0
Disallow: /
# Google robot from 66.249.66.xxx
User-Agent: Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)
Disallow: /
# Google robot from 66.249.66.xxx
User-Agent: Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 FAKE (compatible; Googlebot/2.1; http://www.google.com/bot.html)
Disallow: /
# Unknown robot from Mozilla.org
User-Agent: Mozilla/4.0 (Mozilla; http://www.mozilla.org/docs/en/bot.html; master@mozilla.com)
Disallow: /
# ASI - Any Search Info robot
User-Agent: Mozilla/4.0 (Sleek Spider/1.2)
Disallow: /
# Furl (Looksmart) online bookmark tool robot
User-Agent: Mozilla/4.0 compatible FurlBot/Furl Search 2.0 (FurlBot; http://www.furl.net; wn.furlbot@looksmart.net)
Disallow: /
# Wisenut robot
User-Agent: Mozilla/4.0 compatible ZyBorg/1.0 (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)
Disallow: /
# Wisenut robot
User-Agent: Mozilla/4.0 compatible ZyBorg/1.0 (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)
Disallow: /
# Wisenut robot
User-Agent: Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)
Disallow: /
# Wisenut robot
User-Agent: Mozilla/4.0 compatible ZyBorg/1.0 for Homepage (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)
Disallow: /
# Unknown robot from 66.230.140.xx (argon.oxeo.com) maybe an e-mail collector
User-Agent: Mozilla/4.0 efp@gmx.net
Disallow: /
# WebTV
User-Agent: Mozilla/4.0 WebTV/2.6 (compatible; MSIE 4.0)
Disallow: /
# Ask / Ask Jeeves robot
User-Agent: Mozilla/4.0 [en] (Ask Jeeves Corporate Spider)
Disallow: /
# LookSmart spider
User-Agent: Mozilla/4.0(compatible; Zealbot 1.0)
Disallow: /
# HideMe - Web based anonymous proxy server service
User-Agent: Mozilla/4.01 (compatible; NORAD National Defence Network)
Disallow: /
# Some download manager spoofing Netscape 4.01
User-Agent: Mozilla/4.01 [en](Win95;I)
Disallow: /
# Netscape 4.x SunOS 5.6
User-Agent: Mozilla/4.02 [en] (X11; I; SunOS 5.6 sun4u)
Disallow: /
# Dulance Bot - Dulance automated price comparison engine
User-Agent: Mozilla/4.04 (compatible; Dulance bot; +http://www.dulance.com/bot.jsp)
Disallow: /
# Netscape 4.x HP-Unix
User-Agent: Mozilla/4.04 [en] (X11; I; HP-UX B.10.20 9000/712)
Disallow: /
# Netscape 4.x IRIX
User-Agent: Mozilla/4.04 [en] (X11; I; IRIX 5.3 IP22)
Disallow: /
# Netscape 4.x Macintosh 68k
User-Agent: Mozilla/4.05 (Macintosh; I; 68K Nav)
Disallow: /
# Netscape 4.x Macintosh PowerPC
User-Agent: Mozilla/4.05 (Macintosh; I; PPC Nav)
Disallow: /
# Netscape 4.x SunOS 4.1.4
User-Agent: Mozilla/4.05 [en] (X11; I; SunOS 4.1.4 sun4m)
Disallow: /
# Someone copied the help function in the referrer field ?
User-Agent: "Mozilla/4.08 [en] (Win98; U ;Nav)" [Spaces are not mistakes]Help->About: "Version 4.08 [en]-98306"
Disallow: /
# Netscape 4.x WinNT
User-Agent: Mozilla/4.08 [en] (WinNT; U)
Disallow: /
# Echo.com robot
User-Agent: Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_TrueRobot/1.4 libwww/5.2.8
Disallow: /
# Voila.fr robot
User-Agent: Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_VoilaBot/1.6 libwww/5.3.2
Disallow: /
# HTTrack Offline Browser
User-Agent: Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)
Disallow: /
# iCab MAC Web browser MAC Power PC
User-Agent: Mozilla/4.5 (compatible; iCab 2.5.3; Macintosh; I; PPC)
Disallow: /
# OmniWeb 4.x.x Mac browser
User-Agent: Mozilla/4.5 (compatible; OmniWeb/4.0.5; Mac_PowerPC)
Disallow: /
# OmniWeb 4.x.x Mac browser
User-Agent: Mozilla/4.5 (compatible; OmniWeb/4.1-beta-1; Mac_PowerPC)
Disallow: /
# different IPs using the HTTPClient library (mostly link checking)
User-Agent: Mozilla/4.5 RPT-HTTPClient/0.3-2
Disallow: /
# RuralNet Internet Services
User-Agent: Mozilla/4.5 [en]C-CCK-MCD {RuralNet} (Win98; I)
Disallow: /
# Netscape 4.x Linux
User-Agent: Mozilla/4.5b1 [en] (X11; I; Linux 2.0.35 i586)
Disallow: /
# Cnet robot for Search.com (216.239.114.xx)
User-Agent: Mozilla/4.6 [en] (http://www.cnet.com/)
Disallow: /
# Netscape 4.x OS/2
User-Agent: Mozilla/4.61 [de] (OS/2; I)
Disallow: /
# BrowseX cross-platform browser
User-Agent: Mozilla/4.61 [en] (X11; U; ) - BrowseX (2.0.0 Windows)
Disallow: /
# Nameprotect (12.148.196.128 - 12.148.196.255) snoopbot
User-Agent: Mozilla/4.7
Disallow: /
# Eidetica earch and text mining spider
User-Agent: Mozilla/4.7 (compatible; http://eidetica.com/spider)
Disallow: /
# Intelliseek (64.158.138.xx) robot
User-Agent: Mozilla/4.7 (compatible; Intelliseek; http://www.intelliseek.com)
Disallow: /
# OffByOne Browser
User-Agent: Mozilla/4.7 (compatible; OffByOne; Windows 98) Webster Pro V3.2
Disallow: /
# WhizBang! Labs information extraction robot
User-Agent: Mozilla/4.7 (compatible; Whizbang)
Disallow: /
# WhizBang! Labs information extraction robot
User-Agent: Mozilla/4.7 (compatible; WhizBang; http://www.whizbang.com/crawler)
Disallow: /
# BecomeBot - Becomecom shopping search (64.124.85.xx(x))
User-Agent: Mozilla/4.7 [en](BecomeBot@exava.com)
Disallow: /
# Exabot - exava shopping search (64.124.85.xx(x))
User-Agent: Mozilla/4.7 [en](Exabot@exava.com)
Disallow: /
# unknown
User-Agent: Mozilla/4.7 [en]C-CCK-MCD {Yahoo;YIP052400} (Win95; I)
Disallow: /
# http://www.ba.be robot
User-Agent: Mozilla/4.72 [en] (BACS http://www.ba.be)
Disallow: /
# Netscpape 4.7x Caldera Open Linux Pentium III
User-Agent: Mozilla/4.72C-CCK-MCD Caldera Systems OpenLinux [en] (X11; U; Linux 2.2.14 i686)
Disallow: /
# Netscape 4.7x Japan OSF1 alpha
User-Agent: Mozilla/4.75C-ja [ja] (X11; U; OSF1 V5.1 alpha)
Disallow: /
# Opera 5.x Win 98
User-Agent: Mozilla/4.76 (Windows 98; U) Opera 5.12 [en]
Disallow: /
# Netscape 4.7x FreeBSD
User-Agent: Mozilla/4.76 [en] (X11; U; FreeBSD 4.4-STABLE i386)
Disallow: /
# Netscape 4.7x SunOS
User-Agent: Mozilla/4.76 [en] (X11; U; SunOS 5.7 sun4u)
Disallow: /
# IRIX 6.5
User-Agent: Mozilla/4.77C-SGI [en] (X11; U; IRIX 6.5 IP32)
Disallow: /
# GigaMedia / NTT DoCoMo robot
User-Agent: Mozilla/5.0
Disallow: /
# Eurekster Swicki community search using SLI-Systems site search engine Mammoth
User-Agent: Mozilla/5.0 (+http://www.eurekster.com/mammoth) Mammoth/0.1
Disallow: /
# SLI Systems mammoth robot
User-Agent: Mozilla/5.0 (+http://www.sli-systems.com/) Mammoth/0.1
Disallow: /
# Clush search robot
User-Agent: Mozilla/5.0 (Clustered-Search-Bot/1.0; support@clush.com; http://www.clush.com/)
Disallow: /
# Greasemonkey RSS panel Firefox plugin
User-Agent: Mozilla/5.0 (compatible) GM RSS Panel X
Disallow: /
# Evri search robot
User-Agent: Mozilla/5.0 (compatible; +http://www.evri.com/evrinid)
Disallow: /
# Abonti WebSearch beta robot
User-Agent: Mozilla/5.0 (compatible; Abonti/0.8 - http://www.abonti.com)
Disallow: /
# Ansearch Australian search robot
User-Agent: Mozilla/5.0 (compatible; AnsearchBot/1.x; +http://www.ansearch.com.au/)
Disallow: /
# The Library of Congress Minerva crawler
User-Agent: Mozilla/5.0 (compatible; archive.org_bot/1.10.0 +http://www.loc.gov/minerva/crawl.html)
Disallow: /
# Heritrix - The Internet Archive's open-source crawler (207.241.225.2xx)
User-Agent: Mozilla/5.0 (compatible; archive.org_bot/1.13.1x http://crawler.archive.org)
Disallow: /
# Heritrix - The Internet Archive's open-source crawler
User-Agent: Mozilla/5.0 (compatible; archive.org_bot/1.5.0-200506132127 http://crawler.archive.org) Hurricane Katrina
Disallow: /
# Ask Jeeves /Teoma robot
User-Agent: Mozilla/5.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml)
Disallow: /
# Project BanBots Perl script robot
User-Agent: Mozilla/5.0 (compatible; BanBots/2.0b; Fetch; +http://www.banbots.com)
Disallow: /
# BecomeBot - Become.com shopping search (64.124.85.xx(x))
User-Agent: Mozilla/5.0 (compatible; BecomeBot/1.23; http://www.become.com/webmasters.html)
Disallow: /
# BecomeBot - Become.com shopping search (64.124.85.xx(x))
User-Agent: Mozilla/5.0 (compatible; BecomeBot/1.xx; MSIE 6.0 compatible; http://www.become.com/webmasters.html)
Disallow: /
# BecomeBot - Become.com shopping search (64.124.85.xx(x))
User-Agent: Mozilla/5.0 (compatible; BecomeBot/2.0beta; http://www.become.com/webmasters.html)
Disallow: /
# BecomeBot - Become.com shopping search (64.124.85.xx(x))
User-Agent: Mozilla/5.0 (compatible; BecomeBot/2.x; MSIE 6.0 compatible; http://www.become.com/site_owners.html)
Disallow: /
# BecomeBot - Become.com shopping search (64.124.85.xx(x))
User-Agent: Mozilla/5.0 (compatible; BecomeJPBot/2.3; MSIE 6.0 compatible; +http://www.become.co.jp/site_owners.html)
Disallow: /
# BlogRefsBot.com blog robot
User-Agent: Mozilla/5.0 (compatible; BlogRefsBot/0.1; http://www.blogrefs.com/about/bloggers)
Disallow: /
# Pressemitteilungen Webservice RSS / news crawler (Germany)
User-Agent: Mozilla/5.0 (compatible; Bot; +http://pressemitteilung.ws/spamfilter
Disallow: /
# BuzzRanking internet content analysis
User-Agent: Mozilla/5.0 (compatible; BuzzRankingBot/1.0; +http://www.buzzrankingbot.com/)
Disallow: /
# Charlotte indexing spider for Searchme / Wikiseek
User-Agent: Mozilla/5.0 (compatible; Charlotte/1.0b; charlotte@betaspider.com)
Disallow: /
# Charlotte indexing spider for Searchme / Wikiseek
User-Agent: Mozilla/5.0 (compatible; Charlotte/1.0b; http://www.searchme.com/support/)
Disallow: /
# Unknown graphics crawler or downloading agent from Yamana Laboratory - Waseda Univerity Japan (133.9.238.xx)
User-Agent: Mozilla/5.0 (compatible; Crawling jpeg; http://www.yama.info.waseda.ac.jp)
Disallow: /
# Custo web site spidering tool (link checking)
User-Agent: Mozilla/5.0 (compatible; Custo 3 (Netwu.com); Windows NT 5.1)
Disallow: /
# De.com German travel related search via Amazon Web Services
User-Agent: Mozilla/5.0 (compatible; de/1.13.2 +http://www.de.com)
Disallow: /
# Diffbot beta - RSS and news feed crawler
User-Agent: Mozilla/5.0 (compatible; Diffbot/0.1; +http://www.diffbot.com)
Disallow: /
# DNS-Digger - DNS server neighbourhood search
User-Agent: Mozilla/5.0 (compatible; DNS-Digger-Explorer/1.0; +http://www.dnsdigger.com)
Disallow: /
# DNS-Digger - DNS server neighbourhood search
User-Agent: Mozilla/5.0 (compatible; DNS-Digger/1.0; +http://www.dnsdigger.com)
Disallow: /
# Earthcom (Czech Republic) search robot (194.108.39.xx)
User-Agent: Mozilla/5.0 (compatible; EARTHCOM.info/2.01; http://www.earthcom.info)
Disallow: /
# enter4u / Earthcom.info search (Czech Republic)
User-Agent: Mozilla/5.0 (compatible; EARTHCOM/2.2; +http://enter4u.eu)
Disallow: /
# Prague Faculty of Mathematics and Physics using Egothor open source crawler
User-Agent: Mozilla/5.0 (compatible; egothor/8.0g; +http://ego.ms.mff.cuni.cz/)
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: Mozilla/5.0 (compatible; Exabot Test/3.0; +http://www.exabot.com/go/robot)
Disallow: /
# TheFind.com - Shopping search robot
User-Agent: Mozilla/5.0 (compatible; FatBot 2.0; http://www.thefind.com/main/CrawlerFAQs.fhtml)
Disallow: /
# Galbot tagging robot (beta) - Denmark
User-Agent: Mozilla/5.0 (compatible; Galbot/1.0; +http://www.galbot.com/bot.html)
Disallow: /
# Geneva Single-Site Search Engine used by Healthdash health search
User-Agent: mozilla/5.0 (compatible; genevabot http://www.healthdash.com)
Disallow: /
# Paros - a Java based HTTP/HTTPS proxy
User-Agent: Mozilla/5.0 (compatible; Google Desktop) Paros/3.2.12
Disallow: /
# Google robot
User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)
Disallow: /
# Unknown robot using Heritrix
User-Agent: mozilla/5.0 (compatible; heritrix/1.0.4 http://innovationblog.com)
Disallow: /
# The Stanford University InfoLab robot using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.10.2 +http://i.stanford.edu/)
Disallow: /
# Newstin news feed search using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.12.1 +http://newstin.com/)
Disallow: /
# Page-store.com vertical search via Amazon Web Services
User-Agent: Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com)
Disallow: /
# Page-store.com vertical search via Amazon Web Services
User-Agent: Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com) [email:paul@page-store.com]
Disallow: /
# Heritrix Internet Archive's open-source web project
User-Agent: mozilla/5.0 (compatible; heritrix/1.3.0 http://archive.crawler.org)
Disallow: /
# Chepi Beta search Spain (194.116.240.1xx) using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.4.0 +http://www.chepi.net)
Disallow: /
# Truveo data mining robot using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.4t http://www.truveo.com/)
Disallow: /
# L3S WebCrawling Project (Germany) using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.5.0 http://www.l3s.de/~kohlschuetter/projects/crawling/)
Disallow: /
# Pandora Internet Archive crawler (Australia) using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.5.0-200506231921 http://pandora.nla.gov.au/crawl.html)
Disallow: /
# WORIO (beta) search for computer scientists and programmers using Heritrix open-source crawler
User-Agent: Mozilla/5.0 (compatible; heritrix/1.6.0 http://www.worio.com/)
Disallow: /
# greatarea.com website collection project using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/1.7.0 +http://www.greaterera.com/)
Disallow: /
# hanzo:web social web archiving service
User-Agent: Mozilla/5.0 (compatible; Heritrix/1.8.0 http://www.hanzoarchives.com)
Disallow: /
# Accelobot - Accelovation Market Discovery software robot
User-Agent: Mozilla/5.0 (compatible; heritrix/1.x.x +http://www.accelobot.com)
Disallow: /
# Unknown AOL robot using Heritrix
User-Agent: Mozilla/5.0 (compatible; heritrix/2.0.0-RC1 +http://www.aol.com)
Disallow: /
# Hermits Search.com - Products and service search robot
User-Agent: Mozilla/5.0 (compatible; Hermit Search. Com; +http://www.hermitsearch.com)
Disallow: /
# IsMySiteUp? - Online website monitoring service
User-Agent: Mozilla/5.0 (compatible; http://www.IsMySiteUp.Net/bot/ )
Disallow: /
# UptimeAuditor - real time web monitoring
User-Agent: Mozilla/5.0 (compatible; http://www.UptimeAuditor.com/bot/ )
Disallow: /
# Hyperix vertical search crawler
User-Agent: Mozilla/5.0 (compatible; HyperixScoop/1.3; +http://www.hyperix.com)
Disallow: /
# Unknown robot (reads robots.txt) from chinatelecom (219.142.78.xx)
User-Agent: Mozilla/5.0 (compatible; iaskspider/1.0; MSIE 6.0)
Disallow: /
# ID-Search.org - Russian search project
User-Agent: Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html)
Disallow: /
# Najdi.si (Slovenia) search using Interseek/Web Interseek/API Search Engine
User-Agent: Mozilla/5.0 (compatible; InterseekWeb/3.x)
Disallow: /
# hanzo:web social web archiving service
User-Agent: Mozilla/5.0 (compatible; Jim +http://www.hanzoarchives.com)
Disallow: /
# Konqueror 2.0.x X11
User-Agent: Mozilla/5.0 (compatible; Konqueror/2.0.1; X11); Supports MD5-Digest; Supports gzip encoding
Disallow: /
# Konqueror 2.1.x X11
User-Agent: Mozilla/5.0 (compatible; Konqueror/2.1.1; X11)
Disallow: /
# Konqueror 2.2.x
User-Agent: Mozilla/5.0 (compatible; Konqueror/2.2.2)
Disallow: /
# Konqueror 2.2.x Linux
User-Agent: Mozilla/5.0 (compatible; Konqueror/2.2.2; Linux 2.4.14-xfs; X11; i686)
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)
Disallow: /
# Lemur Consulting LemIR spider
User-Agent: Mozilla/5.0 (compatible; LemSpider 0.1)
Disallow: /
# Linksmanager.com online link checking service
User-Agent: Mozilla/5.0 (compatible; LinksManager.com_bot http://linksmanager.com/linkchecker.html)
Disallow: /
# LinkStash Bookmark Manager
User-Agent: Mozilla/5.0 (compatible; LinkStash Bookmark Manager; http://www.xrayz.co.uk/)
Disallow: /
# Mojeek Search Preview robot (217.155.205.xx)
User-Agent: Mozilla/5.0 (compatible; MojeekBot/2.0; http://www.mojeek.com/bot.html)
Disallow: /
# Joomla!/Mambo component - MosBookmarks (bot) link checking
User-Agent: Mozilla/5.0 (compatible; MOSBookmarks/v2.6-Plus; Link Checker)
Disallow: /
# PodTech entertainment and video network crawler
User-Agent: Mozilla/5.0 (compatible; MSIE 6.0; Podtech Network; crawler_admin@podtech.net)
Disallow: /
# onet.pl Szukaj (Search) robot (213.180.128.1xx)
User-Agent: Mozilla/5.0 (compatible; OnetSzukaj/5.0; http://szukaj.onet.pl)
Disallow: /
# Pagestacker online bookmark service
User-Agent: Mozilla/5.0 (compatible; PagestackerBot; http://www.pagestacker.com)
Disallow: /
# PalmeraBot - Links24h.com search engine robot
User-Agent: Mozilla/5.0 (compatible; PalmeraBot; http://www.links24h.com/help/palmera) Version 0.001
Disallow: /
# FeedMo feed search (Japan) using Pear HTTP
User-Agent: Mozilla/5.0 (compatible; PEAR HTTP_Request class; http://feed.moo.jp/)
Disallow: /
# PHONifier mobile access to web content
User-Agent: Mozilla/5.0 (compatible; Phonifier; +http://www.phonifier.com)
Disallow: /
# pmoz.info ODP link checking bot
User-Agent: Mozilla/5.0 (compatible; pmoz.info ODP link checker; +http://pmoz.info/doc/botinfo.htm)
Disallow: /
# Pogodak search (Slovenia) robot via Interseek
User-Agent: Mozilla/5.0 (compatible; pogodak.ba/3.x)
Disallow: /
# Pogodak search (Slovenia) robot via Interseek
User-Agent: Mozilla/5.0 (compatible; Pogodak.hr/3.1)
Disallow: /
# Proximic Publisher Widget - RSS and news content generator
User-Agent: Mozilla/5.0 (compatible; Proximic crawler; +http://www.proximic.com/en/about-us/contact-us.html)
Disallow: /
# ProgramacionWeb.net PWeBot crawler (Argentina)
User-Agent: Mozilla/5.0 (compatible; PWeBot/3.1; http://www.programacionweb.net/robot.php)
Disallow: /
# Quantcast - Open Internet Ratings Service
User-Agent: Mozilla/5.0 (compatible; Quantcastbot/1.0; www.quantcast.com)
Disallow: /
# robtex - Multi-RBL check and AS-numbercheck
User-Agent: Mozilla/5.0 (compatible; robtexbot/1.0; http://www.robtex.com/ )
Disallow: /
# ScoutJet (Blekko) search web crawler
User-Agent: Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)
Disallow: /
# Scrub the web robot (66.93.156.xx)
User-Agent: Mozilla/5.0 (compatible; Scrubby/2.2; http://www.scrubtheweb.com/)
Disallow: /
# Shunixbot (France) beta / test semantic web indexing robot
User-Agent: Mozilla/5.0 (compatible; ShunixBot/1.x.x +http://www.shunix.com/robot.htm)
Disallow: /
# Shunixbot (France) beta / test semantic web indexing robot
User-Agent: Mozilla/5.0 (compatible; ShunixBot/1.x; http://www.shunix.com/bot.htm)
Disallow: /
# Skreemr - Audio search engine
User-Agent: Mozilla/5.0 (compatible; SkreemRBot +http://skreemr.com)
Disallow: /
# Snap Firefox Search Plugin
User-Agent: Mozilla/5.0 (compatible; SnapPreviewBot; en-US; rv:1.8.0.9) Gecko/20061206 Firefox/1.5.0.9
Disallow: /
# Spurl.net bookmark service & search engine (84.40.30.xxx)
User-Agent: Mozilla/5.0 (compatible; SpurlBot/0.2)
Disallow: /
# Summize - Opinion and review search robot
User-Agent: Mozilla/5.0 (compatible; SummizeBot +http://www.summize.com)
Disallow: /
# Syclik Control web content management system
User-Agent: Mozilla/5.0 (compatible; SYCLIKControl/LinkChecker;)
Disallow: /
# Synoo web directory robot
User-Agent: Mozilla/5.0 (compatible; Synoobot/0.9; http://www.synoo.com/search/bot.html)
Disallow: /
# Theophrastus Internet Spider for a basic search engine project
User-Agent: Mozilla/5.0 (compatible; Theophrastus/x.x; http://users.cs.cf.ac.uk/N.A.Smith/theophrastus.php)
Disallow: /
# Interseek - Java search engine technology used for Pogodak search
User-Agent: Mozilla/5.0 (compatible; TridentSpider/3.1)
Disallow: /
# WiseGuys robot
User-Agent: Mozilla/5.0 (compatible; Vagabondo/2.1; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)
Disallow: /
# phpwebbrain online bookmark service (Germany)
User-Agent: Mozilla/5.0 (compatible; Windows NT 5.0; phpwebbrainBot/0.1 - http://www.monsterli.ch/phpwebbrain/)
Disallow: /
# WORIO (beta) search for computer scientists and programmers using Heritrix open-source crawler
User-Agent: Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +http://worio.com)
Disallow: /
# Kathune spider for World of Warcraft guild data. Used to power WoW Lemmings
User-Agent: Mozilla/5.0 (compatible; WoW Lemmings Kathune/2.0;http://www.wowlemmings.com/kathune.html)
Disallow: /
# eXternalTest - Server and online services monitoring
User-Agent: Mozilla/5.0 (compatible; XTbot/1.0v; +http://www.externaltest.com)
Disallow: /
# Yahoo / Inktomi search robot
User-Agent: Mozilla/5.0 (compatible; Yahoo! DE Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
Disallow: /
# Inktomi robot (202.160.180.xxx) for Yahoo China
User-Agent: Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)
Disallow: /
# Inktomi robot for Yahoo (via 66.196.xx.xxx)
User-Agent: Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)
Disallow: /
# Yesup Seo - Toronto SEO Service
User-Agent: Mozilla/5.0 (compatible; YesupBot/1.0; +http://www.yesup.net/bot.html)
Disallow: /
# Yoono - community based search (193.110.140.xxx / 194.0.179.[x]xx)
User-Agent: Mozilla/5.0 (compatible; Yoono; http://www.yoono.com/)
Disallow: /
# Youdao search (China) robot
User-Agent: Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; )
Disallow: /
# Zenbot robot for the Southern African Zen search service
User-Agent: Mozilla/5.0 (compatible; Zenbot/1.3; +http://zen.co.za/webmasters/)
Disallow: /
# Powerset Natural Language Search crawler (under development) using Heritrix via Amazon Web Services
User-Agent: Mozilla/5.0 (compatible; zermelo +http://www.powerset.com) [email:paul@page-store.com,crawl@powerset.com]
Disallow: /
# Heritrix - The Internet Archive's open-source crawler (207.241.225.2xx)
User-Agent: Mozilla/5.0 (compatible;archive.org_bot/1.7.1; collectionId=316; Archive-It; +http://www.archive-it.org)
Disallow: /
# Wayback Machine Internet Archive crawler
User-Agent: Mozilla/5.0 (compatible;archive.org_bot/heritrix-1.9.0-200608171144 +http://pandora.nla.gov.au/crawl.html)
Disallow: /
# FindITAnswers - Search engine for software developers
User-Agent: Mozilla/5.0 (compatible;FindITAnswersbot/1.0;+http://search.it-influentials.com/bot.htm)
Disallow: /
# Mainseek search (Poland) robot
User-Agent: Mozilla/5.0 (compatible;MAINSEEK_BOT)
Disallow: /
# MozShot - Technical demo to take screenshot of any URL
User-Agent: Mozilla/5.0 (Gecko/20070310 Mozshot/0.0.20070628; http://mozshot.nemui.org/)
Disallow: /
# Mozilla Firefox 3.0 beta (Gran Paradiso) for MacOS
User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9b5) Gecko/2008032619 Firefox/3.0b5
Disallow: /
# Chimera browser (Mozilla/Gecko engine) - now Camino Mac PowerPC
User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021219 Chimera/0.6
Disallow: /
# Camino browser (Mozilla/Gecko engine) - ex Chimera Mac PowerPC
User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030306 Camino/0.7
Disallow: /
# OmniWeb 5.x.x Mac OS X browser
User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/xx (KHTML like Gecko) OmniWeb/v5xx.xx
Disallow: /
# Safari 1.2x browser (Mozilla/Gecko engine) MAC OS X
User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/xxx.x (KHTML like Gecko) Safari/12x.x
Disallow: /
# Netscape 6.x Mac PowerPC
User-Agent: Mozilla/5.0 (Macintosh; U; PPC; en-US; rv:0.9.2) Gecko/20010726 Netscape6/6.1
Disallow: /
# unknown robot from gw.ocg-corp.com
User-Agent: Mozilla/5.0 (research@mediatrec.com)
Disallow: /
# Sage - RSS and Atom feed reader extension for Mozilla Firefox
User-Agent: Mozilla/5.0 (Sage)
Disallow: /
# Inktomi (Hotbot-Lycos - NBCi etc.) robot
User-Agent: Mozilla/5.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Inktomi (Hotbot-Lycos - NBCi etc.) robot
User-Agent: Mozilla/5.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Opera 5.x SunOS
User-Agent: Mozilla/5.0 (SunOS 5.8 sun4u; U) Opera 5.0 [en]
Disallow: /
# Twiceler experimental web crawler
User-Agent: Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)
Disallow: /
# Some spambot from Romania (82.208.139.1xx & 86.123.65.xx) - Maybe email harvesting
User-Agent: Mozilla/5.0 (Version: xxxx Type:xx)
Disallow: /
# GenieKnows.com search robot (64.5.245.xx / 64.5.220.xxx)
User-Agent: Mozilla/5.0 (wgao@genieknows.com)
Disallow: /
# Netscape 6.x Win98
User-Agent: Mozilla/5.0 (Windows; U; Win98; en-US; rv:0.9.2) Gecko/20010726 Netscape6/6.1
Disallow: /
# Firebird browser (Mozilla/Gecko engine) - ex Phoenix Win98
User-Agent: Mozilla/5.0 (Windows; U; Win98; en-US; rv:x.xx) Gecko/20030423 Firebird Browser/0.6
Disallow: /
# Beonex Communicator browser (Mozilla/Gecko engine)
User-Agent: Mozilla/5.0 (Windows; U; Win9x; en; Stable) Gecko/20020911 Beonex/0.8.1-stable
Disallow: /
# Healthline health related search robot (72.5.115.xx)
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.7) NimbleCrawler 1.11 obeys UserAgent NimbleCrawler For problems contact: crawler_at_dataalchemy.com
Disallow: /
# Google Chrome browser based on WebKit (Safari)
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19
Disallow: /
# Flock web browser built on Mozilla technologies
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.5) Gecko/20060731 Firefox/1.5.0.5 Flock/0.7.4.1
Disallow: /
# Megaupload Mega Manager - Download manager plugin for Firefox
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.4/Megaupload x.0
Disallow: /
# Orca browser - based on Gecko
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008092215 Firefox/3.0.1 Orca/1.1 beta 3
Disallow: /
# Firefox browser (Mozilla/Gecko engine) - ex Firebird WinXP
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x
Disallow: /
# Firebird browser (Mozilla/Gecko engine) - ex Phoenix WinXP
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.xx) Gecko/20030504 Mozilla Firebird/0.6
Disallow: /
# Mnenhy - enhanced mail & news Mozilla based browser
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.xxx) Gecko/20041027 Mnenhy/0.6.0.104
Disallow: /
# Voila.fr robot
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)
Disallow: /
# Voila.fr robot
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)
Disallow: /
# Mozilla Firefox 3.0 beta (Gran Paradiso) for Win
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9b5) Gecko/2008032620 Firefox/3.0b5
Disallow: /
# Tiscali Communicator - Online services suite
User-Agent: Mozilla/5.0 (Windows; U;XMPP Tiscali Communicator v.10.0.1; Windows NT 5.1; it; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3
Disallow: /
# Healthline health related search robot (72.5.115.xx)
User-Agent: Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@health
Disallow: /
# Healthline health related search robot (72.5.115.xx)
User-Agent: Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@healthline.com
Disallow: /
# Kazehakase - Gecko based browser (Japan)
User-Agent: Mozilla/5.0 (X11; Linux i686; U;rv: 1.7.13) Gecko/20070322 Kazehakase/0.4.4.1
Disallow: /
# Netscape 6.x Linux
User-Agent: Mozilla/5.0 (X11; U; Linux 2.4.2-2 i586; en-US; m18) Gecko/20010131 Netscape6/6.01
Disallow: /
# SeaMonkey browser suite (ex Mozilla) on Linux
User-Agent: Mozilla/5.0 (X11; U; Linux i686; de-AT; rv:1.8.0.2) Gecko/20060309 SeaMonkey/1.0
Disallow: /
# Epiphany (Mozilla/Gecko engine) browser Linux
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-GB; rv:1.7.6) Gecko/20050405 Epiphany/1.6.1 (Ubuntu) (Ubuntu package 1.0.2)
Disallow: /
# Nautilus (developed by Eazel.com) 1.x Browser Linux
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; Nautilus/1.0Final) Gecko/20020408
Disallow: /
# Mozilla (Gecko) 0.9x browser Linux
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:0.9.3) Gecko/20010801
Disallow: /
# Phoenix 0.3 browser (Mozilla/Gecko engine) - now Firebird Linux
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2b) Gecko/20021007 Phoenix/0.3
Disallow: /
# Epiphany (Mozilla/Gecko engine) browser Linux
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.6) Gecko/20040413 Epiphany/1.2.1
Disallow: /
# Snap Firefox Search Plugin
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.7) Gecko/20060909 Firefox/1.5.0.7 SnapPreviewBot
Disallow: /
# Bon Echo Alpha - developer preview of future Firefox browser
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1) Gecko/20061129 BonEcho/2.0
Disallow: /
# IceWeasel - the GNU version of the Firefox browser
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.1) Gecko/20061205 Iceweasel/2.0.0.1 (Debian-2.0.0.1+dfsg-2)
Disallow: /
# Mozilla Firefox 3.0 beta (Gran Paradiso) for Linux
User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9a8) Gecko/2007100619 GranParadiso/3.0a8
Disallow: /
# Galeon 1.x Browser Linux
User-Agent: Mozilla/5.0 Galeon/1.0.2 (X11; Linux i686; U;) Gecko/20011224
Disallow: /
# gURLChecker - GNOME link checking tool
User-Agent: Mozilla/5.0 gURLChecker/0.x.x (Linux)
Disallow: /
# URL Spider - used by usww.net
User-Agent: Mozilla/5.0 URL-Spider
Disallow: /
# W8net spider
User-Agent: Mozilla/5.0 usww.com-Spider-for-w8.net
Disallow: /
# GenieKnows.com search robot (64.5.245.xx / 64.5.220.xxx)
User-Agent: Mozilla/5.0 wgao@genieknows.com
Disallow: /
# axxus.de German business directory
User-Agent: Mozilla/5.0 whoiam [http://www.axxus.de/]
Disallow: /
# Yuntis : Collaborative Web Resource Categorization and Ranking Project robot
User-Agent: Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)
Disallow: /
# Blogbot (Germany) robot
User-Agent: Naamah 1.0.1/Blogbot (http://blogbot.de/)
Disallow: /
# Blogbot (Germany) robot
User-Agent: Naamah 1.0a/Blogbot (http://blogbot.de/)
Disallow: /
# Naver Japan / Korea robot
User-Agent: NABOT/5.0
Disallow: /
# Naver Japan / Korea robot
User-Agent: nabot_1.0
Disallow: /
# Badbot searching for Wordpress wp-login.php
User-Agent: NameOfAgent (CMS Spider)
Disallow: /
# naoFavicon4IE
User-Agent: naoFavicon4IE/1.xx
Disallow: /
# Unknown spambot / harvester from diff. IPs
User-Agent: NASA Search 1.0
Disallow: /
# Nationaldirectory spider
User-Agent: NationalDirectory-WebSpider/1.3
Disallow: /
# Nationaldirectory spider
User-Agent: NationalDirectoryAddURL/1.0
Disallow: /
# Naver Japan / Korea robot
User-Agent: NaverBot-1.0 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)
Disallow: /
# Naver Japan / Korea robot
User-Agent: NaverBot_dloader/1.5
Disallow: /
# Navisso closed beta robot (69.41.162.1xx)
User-Agent: NavissoBot
Disallow: /
# Navisso closed beta robot (69.41.162.1xx)
User-Agent: NavissoBot/1.7 (+http://navisso.com/)
Disallow: /
# Vias Information Archival robot
User-Agent: NCSA Beta 1 (http://vias.ncsa.uiuc.edu/viasarchivinginformation.html)
Disallow: /
# Nebulla.info distributed crawler (Germany)
User-Agent: Nebullabot/2.2 (http://bot.nebulla.info)
Disallow: /
# NEC Researchindex robot - now CiteSeer.IST scientific document index
User-Agent: NEC Research Agent -- compuman at research.nj.nec.com
Disallow: /
# rcn.com user agent ? NEC Researchindex robot ?
User-Agent: NEC-Hayek/1.0
Disallow: /
# Net Seekr search robot
User-Agent: Net-Seekr Bot/Net-Seekr Bot V1 (http://www.net-seekr.com)
Disallow: /
# NetAnts download manager
User-Agent: NetAnts/1.2x
Disallow: /
# NetComplete IE browser package
User-Agent: NETCOMplete/x.xx
Disallow: /
# Netinfo.bg search (Bulgaria) robot
User-Agent: NetinfoBot/1.0 (http://netinfo.bg/netinfobot.html)
Disallow: /
# Netlookout internet notifier
User-Agent: NetLookout/2.24
Disallow: /
# Netluchs (Germany) search (193.164.8.xx)
User-Agent: Netluchs/0.8-dev ( ; http://www.netluchs.de/; ___don't___spam_me_@netluchs.de)
Disallow: /
# NetMechanic link checker
User-Agent: NetMechanic Vx.0
Disallow: /
# NewsGator NetNewsWire - Mac RSS feed reader
User-Agent: NetNewsWire/2.x (Mac OS X; http://ranchero.com/netnewswire/)
Disallow: /
# unknown InCom (216.0.107.xx) robot
User-Agent: NetNoseCrawler/v1.0
Disallow: /
# Netprospector metasearch software
User-Agent: Netprospector JavaCrawler
Disallow: /
# Netpumper download manager
User-Agent: NetPumper/x.xx
Disallow: /
# Look.com robot (209.87.232.x)
User-Agent: NetResearchServer(http://www.look.com)
Disallow: /
# IncyWincy search engine using DMOZ database
User-Agent: NetResearchServer/x.x(loopimprovements.com/robot.html)
Disallow: /
# NetSeer search (beta) crawler via Amazon Web Services - see also Teemer
User-Agent: NetSeer/Nutch-0.9 (NetSeer Crawler; http://www.netseer.com; crawler@netseer.com)
Disallow: /
# Wirtualna Polska / Netsprint search (Poland) robot
User-Agent: NetSprint -- 2.0
Disallow: /
# NetWhat Search crawler (69.9.167.1xx)
User-Agent: NetWhatCrawler/0.06-dev (NetWhatCrawler from NetWhat.com; http://www.netwhat.com; support@netwhat.com)
Disallow: /
# Netzippy robot
User-Agent: NetZippy
Disallow: /
# unknown
User-Agent: NeuralBot/0.2
Disallow: /
# Unknown (12.238.4.xxx) attbi.com client robot
User-Agent: newsearchengine (ThisUser@unspecified.mail)
Disallow: /
# FetchLinks plugin for NewsGator RSS reader
User-Agent: NewsGator FetchLinks extension/0.2.0 (http://graemef.com)
Disallow: /
# NewsGator online RSS reader
User-Agent: NewsGatorOnline/2.0 (http://www.newsgator.com; 1 subscribers)
Disallow: /
# Eliyon Crawler for Business People Search
User-Agent: NextGenSearchBot 1 (for information visit http://www.eliyon.com/NextGenSearchBot)
Disallow: /
# Nextopia crawler
User-Agent: NextopiaBOT (+http://www.nextopia.com) distributed crawler client beta v0.x
Disallow: /
# find your keywords - semantic search (Germany) robot
User-Agent: NG-Search/0.90 (NG-SearchBot; http://www.ng-search.com; )
Disallow: /
# Exalead (France) search robot (193.47.80.xx)
User-Agent: NG/1.0
Disallow: /
# Exalead Websearch image crawler (193.47.80.xx)
User-Agent: NG/4.0.1229
Disallow: /
# Unknown UA from PlanetLab distributed network
User-Agent: nicebot
Disallow: /
# NicoZone childsafe search robot
User-Agent: NICO/1.0
Disallow: /
# Nikita the Spider - Online HTML validation , link checking
User-Agent: Nikita the Spider (http://NikitaTheSpider.com/)
Disallow: /
# Experimental LSI (?) robot from 140.233.69.xx (Middlebury.edu)
User-Agent: NITLE Blog Spider/0.01
Disallow: /
# Download manager
User-Agent: Nitro Downloader 1.x (www.klsofttools.com)
Disallow: /
# Noago spider
User-Agent: Noago Spider
Disallow: /
# telefonica.es user robot
User-Agent: Nocilla/1.0
Disallow: /
# Google WAP robot
User-Agent: Nokia-WAPToolkit/1.2 googlebot(at)googlebot.com
Disallow: /
# Botmobi crawler for Find.mobi mobile search
User-Agent: Nokia6300/2.0 (05.50) Profile/MIDP-2.0 Configuration/CLDC-1.1 (botmobi http://find.mobi/bot.html abuse@mtld.mobi)
Disallow: /
# YahooSeeker/M1A1-R2D2 - Yahoo mobile web crawling robot
User-Agent: Nokia6610/1.0 (3.09) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible;YahooSeeker/M1A1-R2D2; http://help.yahoo.com/help/us/ysearch/crawling/crawling-01.html)
Disallow: /
# Google WAP proxy
User-Agent: Nokia7110/1.0 (05.01) (Google WAP Proxy/1.0)
Disallow: /
# Nokodo public beta search robot (67.18.222.xx)
User-Agent: NokodoBot/1.x (+http://nokodo.com/bot.htm)
Disallow: /
# Burf.com UK Search Engine robot
User-Agent: Norbert the Spider(Burf.com)
Disallow: /
# noXtrum search robot (Spain)
User-Agent: noxtrumbot/1.0 (crawler@noxtrum.com)
Disallow: /
# Noyona job search (preview)
User-Agent: noyona_0_1
Disallow: /
# Nameprotect copyright search robot (24.177.134.x)
User-Agent: NP/0.1 (NP; http://www.nameprotect.com; npbot@nameprotect.com)
Disallow: /
# Nameprotect copyright search robot (24.177.134.x)
User-Agent: NPBot (http://www.nameprotect.com/botinfo.html)
Disallow: /
# Nameprotect copyright search robot (24.177.134.x)
User-Agent: NPBot-1/2.0
Disallow: /
# Nsauditor Network Security Auditor
User-Agent: Nsauditor/1.x
Disallow: /
# NetShow Media Player = Windows Media Player 10
User-Agent: NSPlayer/10.0.0.xxxx WMFSDK/10.0
Disallow: /
# Nsyght social search application
User-Agent: nsyght.com/Nutch-1.0-dev (nsyght.com; Nsyght.com)
Disallow: /
# Nsyght social search application
User-Agent: nsyght.com/Nutch-x.x (nsyght.com; search.nsyght.com)
Disallow: /
# NTT Directory robot
User-Agent: nttdirectory_robot/0.9 (super-robot@super.navi.ocn.ne.jp)
Disallow: /
# Nucleus CMS SiteList link managing plugin
User-Agent: Nucleus SiteList LinkChecker/1.1
Disallow: /
# nuSearch spider (84.9.136.xxx)
User-Agent: nuSearch Spider www.nusearch.com (compatible; MSIE 4.01)
Disallow: /
# nuSearch spider (84.9.136.xxx)
User-Agent: NuSearch Spider (compatible; MSIE 6.0)
Disallow: /
# nuSearch spider (84.9.136.xxx)
User-Agent: NuSearch Spider www.nusearch.com
Disallow: /
# Nutch open source robot
User-Agent: Nutch
Disallow: /
# Picapage search for handheld devices using Nutch
User-Agent: Nutch crawler/Nutch-0.9 (picapage.com; admin@picapage.com)
Disallow: /
# Ayell Euronet business directory robot using Nutch
User-Agent: Nutch/Nutch-0.9 (Eurobot; http://www.ayell.eu )
Disallow: /
# Netsweeper content filtering engine (66.207.120.2xx) powered by Nutch
User-Agent: NutchCVS/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)
Disallow: /
# Nutch open source robot
User-Agent: NutchCVS/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)
Disallow: /
# Robot from University of Washington Computer Science & Engineering (128.208.6.2xx)
User-Agent: NutchCVS/0.7.1 (Nutch running at UW; http://www.nutch.org/docs/en/bot.html; sycrawl@cs.washington.edu)
Disallow: /
# Amazon Elastic Compute Cloud (Amazon EC2) robot
User-Agent: NutchEC2Test/Nutch-0.9-dev (Testing Nutch on Amazon EC2.; http://lucene.apache.org/nutch/bot.html; ec2test at lucene.com)
Disallow: /
# Nutch open source robot
User-Agent: NutchOrg/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)
Disallow: /
# Unknown robot using Nutch (maybe private crawling) via Cox network (70.187.130.25x)
User-Agent: nutchsearch/Nutch-0.9 (Nutch Search 1.0; herceg_novi at yahoo dot com)
Disallow: /
# Unknown crawler from University of Washington - Computer science
User-Agent: NutchVinegarCrawl/Nutch-0.8.1 (Vinegar; http://www.cs.washington.edu; eytanadar at gmail dot com)
Disallow: /
# Weblog bookwatch robot
User-Agent: obidos-bot (just looking for books.)
Disallow: /
# Objects Search robot
User-Agent: ObjectsSearch/0.01-dev (ObjectsSearch;http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)
Disallow: /
# Objects Search robot
User-Agent: ObjectsSearch/0.0x (ObjectsSearch; http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)
Disallow: /
# Cobion Germany Brand Protection Services robot
User-Agent: oBot ((compatible;Win32))
Disallow: /
# GlobalSpec Engineering Search robot (66.194.55.xxx)
User-Agent: Ocelli/1.x (http://www.globalspec.com/Ocelli)
Disallow: /
# Octopus download manager
User-Agent: Octopus
Disallow: /
# Octora blog or RSS information crawler - beta (66.228.114.xx)
User-Agent: Octora Beta - www.octora.com
Disallow: /
# Octora RSS feed search
User-Agent: Octora Beta Bot - www.octora.com
Disallow: /
# Meta Products Offlinebrowser
User-Agent: Offline Explorer 1.*
Disallow: /
# Claymont robot / Internetseer Web Site Monitoring
User-Agent: OliverPerry
Disallow: /
# OmniExplorer car & shopping search (64.62.175.xxx)
User-Agent: OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Internet CategorizerOmniExplorer http://www.omni-explorer.com/ car & shopping search (64.62.175.xxx)
Disallow: /
# OmniExplorer car & shopping search (64.62.175.xxx)
User-Agent: OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Job Crawler
Disallow: /
# OmniExplorer car & shopping search (64.62.175.xxx)
User-Agent: OmniExplorer_Bot/1.1x (+http://www.omni-explorer.com) Torrent Crawler
Disallow: /
# OmniExplorer car & shopping search (64.62.175.xxx)
User-Agent: OmniExplorer_Bot/x.xx (+http://www.omni-explorer.com) WorldIndexer
Disallow: /
# onsearch.de German web directory link checking
User-Agent: onCHECK Linkchecker von www.scientec.de fuer www.onsinn.de
Disallow: /
# onsearch.de German web directory link checking
User-Agent: onCHECK-Robot, www.onsearch.de
Disallow: /
# onet.pl Szukaj (Search) robot (213.180.128.1xx)
User-Agent: Onet.pl SA- http://szukaj.onet.pl
Disallow: /
# Dead-Links.com link validation spider
User-Agent: online link validator (http://www.dead-links.com/)
Disallow: /
# Online24 shopping portal (Germany) link checking
User-Agent: Online24-Bot (Version: 1.0x, powered by www.online24.de)
Disallow: /
# OntoSpider - Dutch robot for a research project. (195.11.244.xx)
User-Agent: OntoSpider/1.0 libwww-perl/5.65
Disallow: /
# Openfind.com.tw robot
User-Agent: Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)
Disallow: /
# Openfind.com.tw robot
User-Agent: Openfind data gatherer- Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)
Disallow: /
# Openfind.com.tw robot
User-Agent: Openfind Robot/1.1A2
Disallow: /
# open i search robot - search engine in development
User-Agent: OpenISearch/1.x (www.openisearch.com)
Disallow: /
# Opentagger social bookmarking system
User-Agent: OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
Disallow: /
# OpenText crawler
User-Agent: OpenTextSiteCrawler/2.9.2
Disallow: /
# OpenWebSpider - Open Source web search engine
User-Agent: OpenWebSpider/0.x.x (http://www.openwebspider.org)
Disallow: /
# OpenWebSpider - Open Source web search engine
User-Agent: OpenWebSpider/x
Disallow: /
# Opera 5.0 Linux
User-Agent: Opera/5.0 (Linux 2.0.38 i386; U) [en]
Disallow: /
# Opera 5.11 faked WinME referer
User-Agent: Opera/5.11 (Windows ME; U) [ru]
Disallow: /
# Opera 5.12 Win98
User-Agent: Opera/5.12 (Windows 98; U) [en]
Disallow: /
# unknown robot from gw.ocg-corp.com (209.126.176.x)
User-Agent: Opera/6.01 (larbin@unspecified.mail)
Disallow: /
# Opera 6.x- Mandrake Linux
User-Agent: Opera/6.x (Linux 2.4.8-26mdk i686; U) [en]
Disallow: /
# Opera 6.x WinNT
User-Agent: Opera/6.x (Windows NT 4.0; U) [de]
Disallow: /
# Opera 7.x WinXP
User-Agent: Opera/7.x (Windows NT 5.1; U) [en]
Disallow: /
# Opera 8.x (Beta) WinXP
User-Agent: Opera/8.xx (Windows NT 5.1; U; en)
Disallow: /
# Opera 9 (Beta) Browser
User-Agent: Opera/9.0 (Windows NT 5.1; U; en)
Disallow: /
# Opera 9 (final)
User-Agent: Opera/9.00 (Windows NT 5.1; U; de)
Disallow: /
# Opera browser 9.6x on WinXP (Presto = Operas rendering engine)
User-Agent: Opera/9.60 (Windows NT 5.1; U; de) Presto/2.1.1
Disallow: /
# Opidoo Search Belgium robot (62.4.83.xxx)
User-Agent: OpidooBOT (larbin2.6.3@unspecified.mail)
Disallow: /
# Open Wave Phone Simulator SDK
User-Agent: OPWV-SDK UP.Browser/7.0.2.3.119 (GUI) MMP/2.0 Push/PO
Disallow: /
# Oracle Application Server cache
User-Agent: Oracle Application Server Web Cache 10g
Disallow: /
# Oracle interMedia Text - Text and web documents indexing
User-Agent: Oracle iMTCrawler
Disallow: /
# Oracle Search
User-Agent: Oracle Ultra Search
Disallow: /
# Orangeslicer semantic search (Beta) Germany (193.201.52.1xx)
User-Agent: OrangeSpider
Disallow: /
# Orbiter - DailyOrbit search spider
User-Agent: Orbiter/T-2.0 (+http://www.dailyorbit.com/bot.htm)
Disallow: /
# Orca browser - based on Gecko
User-Agent: Orca Browser (http://www.orcabrowser.com)
Disallow: /
# Marketscore (was Netsetter) internet accelerator
User-Agent: OSSProxy 1.3.305.321 (Build 305.321 Win32 en-us)(Dec 21 2005 16:30:54)
Disallow: /
# Unknown robot from Chinanet (220.181.8.xxx)
User-Agent: OutfoxBot/0.x (For internet experiments; http://; outfox.agent@gmail.com)
Disallow: /
# Unknown robot from Chinanet (60.191.80.1)
User-Agent: OutfoxMelonBot/0.5 (for internet experiments; http://; outfoxbot@gmail.com)
Disallow: /
# Overture/Fast/Alltheweb crawler (66.77.73.xxx)
User-Agent: Overture-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)
Disallow: /
# Unknown robot from 198.169.127.xx (innovationplace.com)
User-Agent: OWR_Crawler 0.1
Disallow: /
# Ozelot - Flying Cat's search engine robot (Germany)
User-Agent: ozelot/2.7.3 (Search engine indexer; www.flying-cat.de/ozelot; ozelot@flying-cat.de)
Disallow: /
# PADLibrary.com - PAD file software robot for FindFiles.com
User-Agent: PADLibrary Spider
Disallow: /
# Pagebites job search crawler
User-Agent: PageBitesHyperBot/600 (http://www.pagebites.com/)
Disallow: /
# Pagebull visual search engine
User-Agent: Pagebull http://www.pagebull.com/
Disallow: /
# Pagestacker online bookmark service
User-Agent: Pagestacker Bot
Disallow: /
# Secure Computing SmartFilter Tools - malware crawler
User-Agent: page_verifier (http://www.securecomputing.com/goto/pv)
Disallow: /
# Downloadmanager ?
User-Agent: PagmIEDownload
Disallow: /
# CFC crawler used by Italian academic and research network (GARR)
User-Agent: parallelContextFocusCrawler1.1parallelContextFocusCrawler1.1
Disallow: /
# http://www.ianett.com robot
User-Agent: ParaSite/1.0b (http://www.ianett.com/parasite/)
Disallow: /
# Patsearch (Germany) robot
User-Agent: Patwebbot (http://www.herz-power.de/technik.html)
Disallow: /
# Pavuk web downloading program for Unix
User-Agent: pavuk/0.9pl29b i686-pc-linux-gnu
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: - cox.net (68.4.xxx.xxx)
User-Agent: PBrowse 1.4b
Disallow: /
# Post.sk / Eurotel.sk robot
User-Agent: pd02_1.0.0 pd02_1.0.0@dzimi@post.sk
Disallow: /
# Pear HTTP_Request PHP extension package
User-Agent: PEAR HTTP_Request class ( http://pear.php.net/ )
Disallow: /
# Peerbot - favicon search robot
User-Agent: PEERbot www.peerbot.com
Disallow: /
# Unknown robot from 64.5.48.xxx (Plethoric.net)
User-Agent: PeopleChat/Search_Engine
Disallow: /
# Some site scanning tool via diff. IPs
User-Agent: PEval 1.4b
Disallow: /
# diff. IPs / services
User-Agent: PHP/3.x.xx
Disallow: /
# diff. IPs / services
User-Agent: PHP/4.0.4pl1
Disallow: /
# diff. IPs / services- i.e.: -NTT/Verio Inc. link checker
User-Agent: PHP/4.0.6
Disallow: /
# diff. IPs / services- i.e.: - Phenominet.com link checking
User-Agent: PHP/4.1.1
Disallow: /
# diff. IPs / services- i.e.: - 209.114.200.xx = MyNetCrawler link checking - 216.139.207.xxx = Mixcat crawler
User-Agent: PHP/4.1.2
Disallow: /
# Pico Search robot
User-Agent: PicoSearch/1.0
Disallow: /
# Piffany targeted search web spider
User-Agent: Piffany_Web_Scraper_v0.x
Disallow: /
# Piffany targeted search web spider
User-Agent: Piffany_Web_Spider_v0.x
Disallow: /
# Whois Source domain name information robot (66.249.26.xx)- s.also: - SurveyBot
User-Agent: PigeonBot1.0 BETA
Disallow: /
# PingALink website monitoring
User-Agent: PingALink Monitoring Services 1.0
Disallow: /
# PingALink website monitoring
User-Agent: PingALink Monitoring Services 1.0 (http://www.pingalink.com)
Disallow: /
# Pingdom web site monitoring
User-Agent: Pingdom GIGRIB (http://www.pingdom.com)
Disallow: /
# pipeline search (DMOZ based) search robot (24.106.39. xxx)
User-Agent: pipeLiner/0.3a (PipeLine Spider;http://www.pipeline-search.com/webmaster.html; webmaster'at'pipeline-search.com)
Disallow: /
# pipeline search (DMOZ based) search robot (24.106.39. xxx)
User-Agent: pipeLiner/0.xx (PipeLine Spider; http://www.pipeline-search.com/webmaster.html)
Disallow: /
# Pita crawler
User-Agent: Pita
Disallow: /
# Private user-agent via Hurricane Electric Internet Services
User-Agent: Pizilla++ ver 2.45
Disallow: /
# Portaljuice spider
User-Agent: PJspider/3.0 (pjspider@portaljuice.com; http://www.portaljuice.com)
Disallow: /
# Plagger - pluggable RSS/Atom feed aggregator written in Perl
User-Agent: Plagger/0.x.xx (http://plagger.org/)
Disallow: /
# unknown ucsd.edu robot
User-Agent: PlagiarBot/1.0
Disallow: /
# Plantynet web filtering services - Blacklist DB robot
User-Agent: PlantyNet_WebRobot_V1.9 dhkang@plantynet.com
Disallow: /
# Unknown UA from 66.220.23.2xx
User-Agent: plinki/0.1 (you got plinked! (thats a good thing..); http://www.plinki.com; crawl@plinki.com)
Disallow: /
# Pluck RSS feed crawler
User-Agent: PluckFeedCrawler/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://www.pluck.com; 1 subscribers)
Disallow: /
# Pluggd Podcast search engine
User-Agent: Pluggd/Nutch-0.9 (automated crawler http://www.pluggd.com;support at pluggd dot com)
Disallow: /
# Yutaka Endo's Pockey / GetHTML / GetHTMLW - some downloading tool from Japan
User-Agent: Pockey-GetHTML/4.12.0 (Win32; GUI; ix86)
Disallow: /
# Yutaka Endo's Pockey / GetHTML / GetHTMLW - some downloading tool from Japan
User-Agent: Pockey-GetHTML/x.xx
Disallow: /
# Yutaka Endo's Pockey / GetHTML / GetHTMLW - some downloading tool from Japan
User-Agent: Pockey/x.x.x
Disallow: /
# Yutaka Endo's Pockey / GetHTML / GetHTMLW - some downloading tool from Japan
User-Agent: Pockey7.x.x(WIN32GUI)
Disallow: /
# HTTP user-agent for POE (portable networking framework for Perl )
User-Agent: POE-Component-Client-HTTP/0.64 (perl; N; POE; en; rv:0.640000)
Disallow: /
# ThePlanet/jaja-jak-globusy.com Google Adsense refferer spam bot from 70.85.116.* / 70.84.128.xxx / 70.85.193.xxx
User-Agent: Poirot
Disallow: /
# Polybot webcrawler
User-Agent: polybot 1.0 (http://cis.poly.edu/polybot/)
Disallow: /
# Dir.com / Iliad French recherche robot
User-Agent: Pompos/1.x http://dir.com/pompos.html
Disallow: /
# Iliad / Free French recherche robot
User-Agent: Pompos/1.x pompos@iliad.fr
Disallow: /
# Popdex - web site popularity crawler
User-Agent: Popdexter/1.0
Disallow: /
# Unknown spam bot / harvester (63.223.10.***)
User-Agent: Port Huron Labs
Disallow: /
# PortalB (now Alacra search) spider
User-Agent: PortalBSpider/2.0 (spider@portalb.com)
Disallow: /
# portalmmm IMode mobile browser
User-Agent: portalmmm/2.0 S500i(c20;TB)
Disallow: /
# Yahoo (66.94.237.1xx / 216.109.121.xx) favorites tracking robot
User-Agent: PostFavorites
Disallow: /
# Potbot : A simple IRC bot written in Perl
User-Agent: potbot 1.0
Disallow: /
# Project Rialto - data mining development project
User-Agent: PRCrawler/Nutch-0.9 (data mining development project; crawler@projectrialto.com)
Disallow: /
# CUPS robot for AT&T Privacy Bird Privacy Preferences (P3P) enhancements
User-Agent: PrivacyFinder Cache Bot v1.0
Disallow: /
# CUPS robot for AT&T Privacy Bird Privacy Preferences (P3P) enhancements
User-Agent: PrivacyFinder/1.1
Disallow: /
# Privoxy web proxy
User-Agent: Privoxy/3.0 (Anonymous)
Disallow: /
# Some site scanning tool from diff. IPs- i.e.: - 67.99.33.x (lightningcon.broadwing.net)
User-Agent: Production Bot 0116B
Disallow: /
# Some site scanning tool from diff. IPs- i.e.: - 216.232.64.xx (telus.net)
User-Agent: Production Bot 2016B
Disallow: /
# Some site scanning tool from diff. IPs- i.e.: - 141.154.181.xxx (east.verizon.net)
User-Agent: Production Bot DOT 3016B
Disallow: /
# Some spam bot
User-Agent: Program Shareware 1.0.2
Disallow: /
# unknown
User-Agent: Progressive Download
Disallow: /
# unknown
User-Agent: Progressive Download HTTP check
Disallow: /
# XP5 robot
User-Agent: Project XP5 [2.03.07-111203]
Disallow: /
# Answerchase PROve Answerbot
User-Agent: PROve AnswerBot 4.0
Disallow: /
# ProWebguide robot
User-Agent: ProWebGuide Link Checker (http://www.prowebguide.com)
Disallow: /
# Picsearch robot (62.119.21.13x)
User-Agent: psbot/0.1 (+http://www.picsearch.com/bot.html)
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: QWest Net
User-Agent: PSurf15a 11
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: Optonline net (24.191.xxx.xxx)
User-Agent: PSurf15a 51
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: - choiceone.net (216.153.xxx.xxx) - attbi.com (12.250.xxx.xxx) - optonline.net (24.191.xxx.xxx)
User-Agent: PSurf15a VA
Disallow: /
# Unknown website grabbing / ripping for unknown purposes from 208.66.195.x - Digitalinfinity.org Russia
User-Agent: psycheclone
Disallow: /
# Some robot from Stanford University (171.64.75.xxx = PubCrawl.Stanford.EDU)
User-Agent: PubCrawl (pubcrawl.stanford.edu)
Disallow: /
# Parallel URL Fetcher downloading tool
User-Agent: puf/0.91beta6a (Linux 2.2.18; i686)
Disallow: /
# Parallel URL Fetcher downloading tool
User-Agent: puf/0.93.2a (Linux 2.4.18; i686)
Disallow: /
# WebarooBot - Webaroo web site search / theme based downloading tool (64.124.122.2xx)
User-Agent: pulseBot (pulse Web Miner)
Disallow: /
# PureSight Internet content filter
User-Agent: PureSight
Disallow: /
# Puxa Rapido download manager
User-Agent: PuxaRapido v1.0
Disallow: /
# ProgramacionWeb.net PWeBot link checking (Argentina)
User-Agent: PWeBot/1.2 Inspector (http://www.programacionweb.net/robot.php)
Disallow: /
# Fast Search robot (using PycURL Python component- s. below)
User-Agent: PycURL
Disallow: /
# PycURL - Python interface to libcurl
User-Agent: PycURL/7.xx.x
Disallow: /
# Python URL fetcher - robot used by Naver Japan/Korea
User-Agent: Python-urllib/1.1x
Disallow: /
# Python URL fetcher - robot used by Google
User-Agent: Python-urllib/2.0a1
Disallow: /
# Qango.com Web Directory robot
User-Agent: Qango.com Web Directory (http://www.qango.com/)
Disallow: /
# QEAVis: Quantitative Evaluation of Academic Websites Visibility using Nutch
User-Agent: QEAVis Agent/Nutch-0.9 (Quantitative Evaluation of Academic Websites Visibility; http://nlp.uned.es/qeavis
Disallow: /
# Quepasa!com (Latin American search) robot
User-Agent: QPCreep Test Rig ( We are not indexing- just testing )
Disallow: /
# Quepasa!com (Latin American search) robot
User-Agent: QuepasaCreep ( crawler@quepasacorp.com )
Disallow: /
# Quepasa!com (Latin American search) robot
User-Agent: QuepasaCreep v0.9.1x
Disallow: /
# QueryN Metasearch robot
User-Agent: QueryN Metasearch
Disallow: /
# Blacktrees Quicksilver helper application for Mac
User-Agent: Quicksilver (Blacktree,MacOSX)
Disallow: /
# Quicktime for Macintosh
User-Agent: QuickTime\xaa.7.0.4 (qtver=7.0.4;cpu=PPC;os=Mac 10.3.9)
Disallow: /
# Qweerybot for the Qweery search engine (in development) - Netherland
User-Agent: QweeryBot/3.01 ( http://qweerybot.qweery.nl)
Disallow: /
# Qweerybot for the Qweery search engine (in development) - Netherland
User-Agent: Qweery_robot.txt_CheckBot/3.01 (http://qweerybot.qweery.com)
Disallow: /
# Radian6 RSS feed comment crawler
User-Agent: R6_CommentReader_(www.radian6.com/crawler)
Disallow: /
# Radian6 Rss feed crawler
User-Agent: R6_FeedFetcher_(www.radian6.com/crawler)
Disallow: /
# gigaBaz - the brainbot (Germany) robot
User-Agent: rabaz (rabaz at gigabaz dot com)
Disallow: /
# DAUMOA - Daum search Korea robot (211.115.109.xxx)
User-Agent: RaBot/1.0 Agent-admin/phortse@hanmail.net
Disallow: /
# Bot Provider for the All Womans Bot Service?
User-Agent: Rainbot1.1
Disallow: /
# Intersearch.de (was www.intersearch.de) robot (Germany)
User-Agent: ramBot xtreme x.x
Disallow: /
# giveRAMP Search Engine robot (64.69.43.1xx)
User-Agent: RAMPyBot - www.giveRAMP.com/0.1 (RAMPyBot - www.giveRAMP.com; http://www.giveramp.com/bot.html; support@giveRAMP.com)
Disallow: /
# giveRAMP Search Engine robot (64.69.43.1xx)
User-Agent: RAMPyBot/0.8-dev (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
Disallow: /
# Rank Exec reciprocal link checking
User-Agent: Rank Exec (rankexec.com) Reciprocal Link Manager 1.x/bot
Disallow: /
# Rankiva website popularity robot
User-Agent: Rankivabot/3.2 (www.rankiva.com; 3.2; vzmxikn)
Disallow: /
# Innova/IBM Rational SiteCheck - Rational robot
User-Agent: Rational SiteCheck (Windows NT)
Disallow: /
# Unknown spider from Raytheon Company - maybe Raytheon High Speed Guard proxy
User-Agent: RAYSPIDER/Nutch-0.9
Disallow: /
# Read A Blog - RSS feed and blog search engine
User-Agent: ReadABlog Spider (compatible; 1.1; feed update; www.readablog.com)
Disallow: /
# RealDownload download manager
User-Agent: RealDownload/4.0.0.4x
Disallow: /
# The REAP Web Crawler for the REAP project
User-Agent: REAP-crawler Nutch/Nutch-1.0-dev (Reap Project; http://reap.cs.cmu.edu/REAP-crawler/; Reap Project)
Disallow: /
# Reaper robot for SiteSearch
User-Agent: Reaper [2.03.10-031204] (http://www.sitesearch.ca/reaper/)
Disallow: /
# Reaper robot for SiteSearch
User-Agent: Reaper/2.0x (+http://www.sitesearch.ca/reaper)
Disallow: /
# REBOL messaging language for distributed Internet apps
User-Agent: REBOL Core 2.x.x.x.x
Disallow: /
# REBOL/View - machine independent internet client application
User-Agent: REBOL View 1.x.x.x.x
Disallow: /
# Rebusnet software site - link / submission checking
User-Agent: RebusnetBot (+http://www.rebusnet.biz)
Disallow: /
# Rebusnet software site - link / submission checking
User-Agent: RebusnetPADBot/1.5x (+http://www.rebusnet.biz)
Disallow: /
# Online reciprocal link checker
User-Agent: reciprocal links checker (http://www.recip-links.com/)
Disallow: /
# rediff.com search link checking
User-Agent: RedBot/redbot-1.0 (Rediff.com Crawler; redbot at rediff dot com)
Disallow: /
# RedCarpet crawler for Pronto price comparison search(66.179.107.1xx)
User-Agent: RedCarpet/1.2 (http://www.redcarpet-inc.com/robots.html)
Disallow: /
# Der Bot for telegenetic.net's security related search (65.220.67.2xx)
User-Agent: RedCell/0.1 (InfoSec Search Bot (Coming Soon); http://www.telegenetic.net/bot.html; lhall@telegenetic.net)
Disallow: /
# Der Bot for telegenetic.net's security related search (65.220.67.2xx)
User-Agent: RedCell/0.1 (RedCell; telegenetic.net/bot.html; lhall_at_telegenetic.net)
Disallow: /
# RedKernel Softwares robot
User-Agent: RedKernel WWW-Spider 2/0 (+http://www-spider.redkernel-softwares.com/)
Disallow: /
# REL Link Checker Lite free version of Web Link Validator
User-Agent: REL Link Checker Lite x.x
Disallow: /
# unknown
User-Agent: RepoMonkey Bait & Tackle/v1.01
Disallow: /
# Rewebber proxy service
User-Agent: Rewebber/1.2 libwww-perl/5.41
Disallow: /
# Applied Semantics Auto-Categorizer for QWestDex Direct
User-Agent: rico/0.1
Disallow: /
# RixBot Rebol Indexer for the RIX - Rebol related search (195.204.121.xx)
User-Agent: RixBot (http://babelserver.org/rix)
Disallow: /
# StreamBox VCR user agent
User-Agent: RMA/1.0 (compatible; RealMedia)
Disallow: /
# Real Media server acting as client
User-Agent: RMA/1.0 (compatible; RealMedia)
Disallow: /
# Canadian Content search crawler
User-Agent: RoboCrawl (http://www.canadiancontent.net)
Disallow: /
# Canadian Content Search (207.44.220.xx) robot
User-Agent: RoboCrawl (www.canadiancontent.net)
Disallow: /
# FindPal Australia metasearch robot (61.68.139.xx)
User-Agent: RoboPal (http://www.findpal.com/)
Disallow: /
# PopJapanSearch robot
User-Agent: Robot/www.pj-search.com
Disallow: /
# Experimental robot using Wget via attbi.net
User-Agent: Robot: NutchCrawler- Owner: wdavies@acm.org
Disallow: /
# Supersnooper robot
User-Agent: Robot@SuperSnooper.Com
Disallow: /
# Netscape Directory / DMOZ Open Directory link crawler
User-Agent: Robozilla/1.0
Disallow: /
# ROME - Open source Java tools for RSS and Atom feeds
User-Agent: Rome Client (http://tinyurl.com/64t5n) Ver: 0.9
Disallow: /
# Qualigo.de robot
User-Agent: Rotondo/3.1 libwww/5.3.1
Disallow: /
# different IPs using the HTTPClient library (mostly link checking)
User-Agent: RPT-HTTPClient/0.3-x
Disallow: /
# Metacarta.com (66.28.xx.xxx) robot
User-Agent: RRC (crawler_admin@bigfoot.com)
Disallow: /
# RSS Bandit RSS/Atom reader for .NET framework
User-Agent: RssBandit/1.5.0.10 (.NET CLR 1.1.4322.2407; WinNT 5.1.2600.0; http://www.rssbandit.org) (.NET CLR 1.1.4322.2407; WinNT 5.1.2600.0; )
Disallow: /
# RSS Micro Search - RSS feed search engine
User-Agent: RSSMicro.com RSS/Atom Feed Robot
Disallow: /
# RSSOwl embedded RSS feed reader
User-Agent: RSSOwl/1.2.3 2006-11-26 (Windows; U; zhtw)
Disallow: /
# RSSOwl embedded RSS feed reader
User-Agent: RSSOwl/1.2.4 Preview Release 2007-04-15 (Windows; U; zhtw)
Disallow: /
# Ykoon RssReader news feed reader
User-Agent: RssReader/1.0.xx.x (http://www.rssreader.com) Microsoft Windows NT 5.1.2600.0
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: - dslx.net (208.35.1x.xxx) - Home.com
User-Agent: RSurf15a 41
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: - dslx.net (208.35.1x.xxx) - Home.com
User-Agent: RSurf15a 51
Disallow: /
# Some site scanning tool via diff. IPs- i.e.: - dslx.net (208.35.1x.xxx) - Home.com
User-Agent: RSurf15a 81
Disallow: /
# rubhub blog spider
User-Agent: Rubbot/1.0 (+http://rubhub.com/)
Disallow: /
# WebarooBot - Webaroo web site search / theme based downloading tool (64.124.122.2xx)
User-Agent: RufusBot (Rufus Web Miner; http://64.124.122.252/feedback.html)
Disallow: /
# WebarooBot - Webaroo web site search / theme based downloading tool (64.124.122.2xx)
User-Agent: RufusBot (Rufus Web Miner; http://www.webaroo.com/rooSiteOwners.html)
Disallow: /
# unknown robot from rumours.jp (202.214.69.xxx)
User-Agent: Rumours-Agent
Disallow: /
# RX (Reflexive Search) Bar for IE
User-Agent: RX Bar
Disallow: /
# Search & Links directory spider
User-Agent: S&L Spider (http://search.hirners.com/)
Disallow: /
# SEO-Tools.net link checking ?
User-Agent: S.T.A.L.K.E.R. (http://www.seo-tools.net/en/bot.aspx)
Disallow: /
# SafariBookmarkChecker for Mac OS X
User-Agent: SafariBookmarkChecker (+http://www.coriolis.ch/)
Disallow: /
# sait robot - unknown robot from Samsung International Korea
User-Agent: sait/Nutch-0.9 (SAIT Research; http://www.samsung.com)
Disallow: /
# Sandcrawler robot from Microsoft (131.107.0.xx)
User-Agent: SandCrawler - Compatibility Testing
Disallow: /
# Sapphire Web Crawler from Carnegie Mellon University's Language Technologies Institute
User-Agent: SapphireWebCrawler/1.0 (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)
Disallow: /
# Sapphire Web Crawler from Carnegie Mellon University's Language Technologies Institute
User-Agent: SapphireWebCrawler/Nutch-1.0-dev (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)
Disallow: /
# WebSavvy Directory robot
User-Agent: savvybot/0.2
Disallow: /
# SiteSell SBIder Nutch based crawler
User-Agent: SBIder/0.7 (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)
Disallow: /
# SiteSell SBIder Nutch based crawler
User-Agent: SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)
Disallow: /
# Softbyte Labs Black Widow web site ripper
User-Agent: SBL-BOT (http://sbl.net)
Disallow: /
# ScanWeb - regular expression based web page searching tool
User-Agent: ScanWeb
Disallow: /
# ScholarUniverse - Scholarly experts search robot
User-Agent: ScholarUniverse/0.8 (Nutch;+http://scholaruniverse.com/bot.jsp; fetch-agent@scholaruniverse.com)
Disallow: /
# URL Spider Pro (USP) used by German Schwarzmann GmbH
User-Agent: schwarzmann.biz-Spider_for_paddel.org+(http://www.innerprise.net/usp-spider.asp)
Disallow: /
# 1X Web Browser
User-Agent: Science Traveller International 1X/1.0
Disallow: /
# WebWobot UK search engine robot (82.43.129.2xx)
User-Agent: ScollSpider/2.0 (+http://www.webwobot.com/ScollSpider.php)
Disallow: /
# Altavista robot
User-Agent: Scooter-3.0.EU
Disallow: /
# Altavista robot
User-Agent: Scooter-3.0.FS
Disallow: /
# Altavista robot
User-Agent: Scooter-3.0.HD
Disallow: /
# Altavista robot
User-Agent: Scooter-3.0.VNS
Disallow: /
# Altavista robot
User-Agent: Scooter-3.0QI
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.BT
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.DIL
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.EX
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.JT
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.NIV
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.SF0
Disallow: /
# Altavista robot
User-Agent: Scooter-3.2.snippet
Disallow: /
# Altavista robot
User-Agent: Scooter-3.3dev
Disallow: /
# Altavista robot
User-Agent: Scooter-ARS-1.1
Disallow: /
# Altavista robot
User-Agent: Scooter-ARS-1.1-ih
Disallow: /
# Altavista robot
User-Agent: scooter-venus-3.0.vns
Disallow: /
# Altavista robot
User-Agent: Scooter-W3-1.0
Disallow: /
# Altavista robot
User-Agent: Scooter-W3.1.2
Disallow: /
# Altavista robot
User-Agent: Scooter/1.0
Disallow: /
# Altavista robot
User-Agent: Scooter/1.0 scooter@pa.dec.com
Disallow: /
# Altavista robot
User-Agent: Scooter/1.1 (custom)
Disallow: /
# Altavista robot
User-Agent: Scooter/2.0 G.R.A.B. V1.1.0
Disallow: /
# Altavista robot
User-Agent: Scooter/2.0 G.R.A.B. X2.0
Disallow: /
# Altavista robot
User-Agent: Scooter/3.3
Disallow: /
# Altavista robot
User-Agent: Scooter/3.3.QA.pczukor
Disallow: /
# Altavista robot
User-Agent: Scooter/3.3.vscooter
Disallow: /
# Altavista robot
User-Agent: Scooter/3.3_SF
Disallow: /
# Altavista using Mercator robot
User-Agent: Scooter2_Mercator_x-x.0
Disallow: /
# Altavista robot
User-Agent: Scooter_bh0-3.0.3
Disallow: /
# Altavista robot
User-Agent: Scooter_trk3-3.0.3
Disallow: /
# Scope Navigator mobile browser (Japan)
User-Agent: Scope (Mars+)
Disallow: /
# Some nec.com robot using Research Republic ScoutAbout Research Tool
User-Agent: ScoutAbout
Disallow: /
# Ant.com search robot
User-Agent: ScoutAnt/0.1; +http://www.ant.com/what_is_ant.com/
Disallow: /
# ScoutMaster information retrieval software
User-Agent: scoutmaster
Disallow: /
# Scrub the web robot (66.93.156.xx)
User-Agent: Scrubby/2.x (http://www.scrubtheweb.com/)
Disallow: /
# Scrub the web robot (66.93.156.xx)
User-Agent: Scrubby/3.0 (+http://www.scrubtheweb.com/help/technology.html)
Disallow: /
# URL Search+ search software
User-Agent: Search+
Disallow: /
# Xtreem Search Engine Studio - SE software
User-Agent: Search-Engine-Studio
Disallow: /
# Search.ch robot
User-Agent: search.ch V1.4
Disallow: /
# Search.ch robot
User-Agent: search.ch V1.4.2 (spiderman@search.ch; http://www.search.ch)
Disallow: /
# Enterprise Search web indexing / site searching tool
User-Agent: Search/1.0 (http://www.innerprise.net/es-spider.asp)
Disallow: /
# Unknown robot / website grabber from Chinatelecom (219.142.78.xxx)
User-Agent: searchbot admin@google.com
Disallow: /
# SearchByUSA robot (69.150.7.xxx)
User-Agent: SearchByUsa/2 (SearchByUsa; http://www.SearchByUsa.com/bot.html; info@SearchByUsa.com)
Disallow: /
# Searchday (Germany) search robot
User-Agent: SearchdayBot
Disallow: /
# Searchexpress spider
User-Agent: SearchExpress Spider0.99
Disallow: /
# Searchguild forum & directory robot (81.3.75.xxx)
User-Agent: SearchGuild/DMOZ/Experiment (searchguild@gmail.com)
Disallow: /
# Searchguild forum & directory robot (81.3.75.xxx)
User-Agent: SearchGuild_DMOZ_Experiment (chris@searchguild.com)
Disallow: /
# Searchit robot (69.93.107.xx)
User-Agent: Searchit-Now Robot/2.2 (+http://www.searchit-now.co.uk)
Disallow: /
# Searchmee! Search Engine (prototype) robot by findanisp.com
User-Agent: Searchmee! Spider v0.98a
Disallow: /
# SearchSight search robot
User-Agent: SearchSight/2.0 (http://SearchSight.com/)
Disallow: /
# SearchSpider robot
User-Agent: SearchSpider.com/1.1
Disallow: /
# SearchSpider robot
User-Agent: Searchspider/1.2 (SearchSpider; http://www.searchspider.com; webmaster@searchspider.com)
Disallow: /
# Janas (Ideare.com / Tiscali.it) robot
User-Agent: SearchTone2.0 - IDEARE
Disallow: /
# seekport. beta search (Germany) robot
User-Agent: Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3
Disallow: /
# seekport. beta search (Germany) robot
User-Agent: Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.0 (XDF)
Disallow: /
# seekport. beta search (Germany) robot
User-Agent: Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2
Disallow: /
# Lookseek search robot / link checking
User-Agent: Seeker.lookseek.com
Disallow: /
# semaforo.net web filtering software
User-Agent: semaforo.net
Disallow: /
# Semager.de (was NG-Search) semantic search - Germany
User-Agent: Semager/1.1 (http://www.semager.de/blog/semager-bots/)
Disallow: /
# Semager.de (was NG-Search) semantic search - Germany
User-Agent: Semager/1.x (http://www.semager.de)
Disallow: /
# Semantic Discovery domain checking tool
User-Agent: semanticdiscovery/0.x
Disallow: /
# Sensis Australia search robot
User-Agent: Sensis Web Crawler (search_comments\at\sensis\dot\com\dot\au)
Disallow: /
# Sensis Australia search robot
User-Agent: Sensis.com.au Web Crawler (search_comments\at\sensis\dot\com\dot\au)
Disallow: /
# Seznam Search (Czech Republic) robot
User-Agent: SeznamBot/1.0
Disallow: /
# Seznam Search (Czech Republic) robot
User-Agent: SeznamBot/1.0 (+http://fulltext.seznam.cz/)
Disallow: /
# Seznam Search (Czech Republic) robot
User-Agent: SeznamBot/2.0-test (+http://fulltext.sblog.cz/)
Disallow: /
# Unknown robot from Shablast.com - Website has no content - Ignores robots.txt
User-Agent: ShablastBot 1.0
Disallow: /
# Shareaza P2P peer-to-peer download client
User-Agent: Shareaza v1.x.x.xx
Disallow: /
# SharewarePlaza File Check Bot - link checking
User-Agent: SharewarePlazaFileCheckBot/1.0+(+http://www.SharewarePlaza.com)
Disallow: /
# Chikayama-Taura Lab Shim-Crawler used for The Kototoi Project (Japan) - (133.11.36.xx)
User-Agent: Shim Crawler
Disallow: /
# Chikayama-Taura Lab Shim-Crawler used for The Kototoi Project (Japan) - (133.11.36.xx)
User-Agent: Shim-Crawler(Mozilla-compatible; http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp)
Disallow: /
# ShopWiki shopping search based on LittleWiki search
User-Agent: ShopWiki/1.0 ( +http://www.shopwiki.com/)
Disallow: /
# ShopWiki shopping search based on LittleWiki search
User-Agent: ShopWiki/1.0 ( +http://www.shopwiki.com/wiki/Help:Bot)
Disallow: /
# Shoula Search Engine crawler
User-Agent: Shoula.com Crawler 2.0
Disallow: /
# Siets Crawler - Web based site crawling application
User-Agent: SietsCrawler/1.1 (+http://www.siets.biz)
Disallow: /
# Sigram's Nutch robot - crawler testing
User-Agent: Sigram/Nutch-1.0-dev (Test agent for Nutch development; http://www.sigram.com/bot.html; bot at sigram dot com)
Disallow: /
# Siigle search (Turkey) robot
User-Agent: Siigle Orumcex v.001 Turkey (http://www.siigle.com)
Disallow: /
# Slider Search directory robot (194.213.194.2xx)
User-Agent: silk/1.0
Disallow: /
# Slider Search directory robot (194.213.194.2xx)
User-Agent: silk/1.0 (+http://www.slider.com/silk.htm)/3.7
Disallow: /
# SimpleFavPanel - IE newsfeed panel plugin
User-Agent: SimpleFavPanel/1.2
Disallow: /
# Simpy bookmarking and personal search engine
User-Agent: Simpy 1.x; http://www.simpy.com/
Disallow: /
# Simpy bookmarking and personal search engine
User-Agent: Simpy/1.x (Simpy; http://www.simpy.com/?ref=bot; feedback at simpy dot com)
Disallow: /
# Sirketçe search - Turkey
User-Agent: Sirketcebot/v.01 (http://www.sirketce.com/bot.html)
Disallow: /
# SiteBar online bookmark manager
User-Agent: SiteBar/3.x.x (Bookmark Server; http://sitebar.org/)
Disallow: /
# SiteBar bookmark server
User-Agent: SiteBar/x.x
Disallow: /
# SiteBar bookmark server
User-Agent: SiteBar/x.x.x (Bookmark Server; http://sitebar.org/)
Disallow: /
# Internetseer Web Site Monitoring / Claymont robot
User-Agent: sitecheck.internetseer.com
Disallow: /
# Internetseer Web Site Monitoring
User-Agent: sitecheck.internetseer.com (For more info see: http://sitecheck.internetseer.com)
Disallow: /
# SiteRecon website monitoring spider at xx minute intervals
User-Agent: SiteRecon+(xx)
Disallow: /
# PC Magazin web site downloadmanager
User-Agent: SiteSnagger
Disallow: /
# Site Spider robot (66.249.17.xx)
User-Agent: SiteSpider +(http://www.SiteSpider.com/)
Disallow: /
# SiteSucker Mac website downloading tool
User-Agent: SiteSucker/1.x.x
Disallow: /
# SiteTagger.com bookmark organizer
User-Agent: SiteTaggerBot (http://www.sitetagger.com/bot.htm)
Disallow: /
# SiteTruth - Automatic site legitimacy rating system
User-Agent: SiteTruth.com site rating system
Disallow: /
# Webwasher.com (217.146.159.xx) internet filter
User-Agent: SiteWinder
Disallow: /
# Xtreeme SiteXpert sitemap & search engine builder
User-Agent: SiteXpert
Disallow: /
# Skaffe.com directory link checker
User-Agent: Skampy/0.9.x (http://www.skaffe.com/skampy-info.html)
Disallow: /
# Skaffe.com directory link checker
User-Agent: Skimpy/0.x (http://www.skaffe.com/skampy-info.html)
Disallow: /
# Visvo distributed website crawler based on Nutch
User-Agent: Skywalker/0.1 (Skywalker; anonymous; anonymous)
Disallow: /
# Only.com robot
User-Agent: Slarp/0.1
Disallow: /
# Sleipnir - Japanese Explorer based browser & search bar
User-Agent: Sleipnir
Disallow: /
# Sleipnir - Japanese Explorer based browser & search bar
User-Agent: Sleipnir Version 1.xx
Disallow: /
# Sleipnir - Japanese Explorer based browser & search bar
User-Agent: Sleipnir Version2.x
Disallow: /
# Sleipnir - Japanese Explorer based browser & search bar
User-Agent: Sleipnir/2.xx
Disallow: /
# Slider Search directory robot (194.213.194.2xx)
User-Agent: Slider_Search_v1-de
Disallow: /
# Slim Browser (IE based browser) - uses this user agent for favicon.ico only
User-Agent: SlimBrowser
Disallow: /
# Inktomi (Hotbot-Lycos-NBCi) robot
User-Agent: Slurp/2.0 (slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Inktomi (Hotbot-Lycos-NBCi) robot
User-Agent: Slurp/2.0-KiteWeekly (slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Inktomi (Hotbot-Lycos-NBCi) robot
User-Agent: Slurp/si (slurp@inktomi.com; http://www.inktomi.com/slurp.html)
Disallow: /
# Inktomi (Hotbot-Lycos-NBCi) robot - 72.30.61.xx(x)
User-Agent: Slurpy Verifier/1.0
Disallow: /
# Slysearch robot (now Turnitin robot)
User-Agent: SlySearch (slysearch@slysearch.com)
Disallow: /
# Slysearch robot (now Turnitin robot)
User-Agent: SlySearch/1.0 http://www.plagiarism.org/crawler/robotinfo.html
Disallow: /
# Slysearch robot (now Turnitin robot)
User-Agent: SlySearch/1.x http://www.slysearch.com
Disallow: /
# Netzip/Smartdownload download manager
User-Agent: SmartDownload/1.2.67 (Win32; Jan 12 1999)
Disallow: /
# Netzip/Smartdownload download manager
User-Agent: SmartDownload/1.2.77 (Win32; Feb 1 2000)
Disallow: /
# Netzip/Smartdownload download manager
User-Agent: SmartDownload/1.2.77 (Win32; Jun 19 2001)
Disallow: /
# Loop Improvements NRS Enterprise search (69.44.155.xx[x])
User-Agent: smartwit.com
Disallow: /
# SmiffyDCMetaSpider - Robot to check the retro-adding of Dublin Core metadata
User-Agent: SmiffyDCMetaSpider/1.0
Disallow: /
# Snoopy PHP-client
User-Agent: sna-0.0.1 (mikemuzio@msn.com)
Disallow: /
# Snoopy PHP-client
User-Agent: sna-0.0.1 mikeelliott@hotmail.com
Disallow: /
# Unknown bot from bb2.net (66.234.139.xxx) also as Snapbot/1.0
User-Agent: snap.com beta crawler v0
Disallow: /
# Unknown bot from bb2.net (66.234.139.xxx) - also as snap.com
User-Agent: Snapbot/1.0
Disallow: /
# Unknown bot from Psinet / Cogentco - not from Snap.com
User-Agent: Snapbot/1.0 (Snap Shots, +http://www.snap.com)
Disallow: /
# My UrlTrends online web ranking service
User-Agent: Snappy/1.1 ( http://www.urltrends.com/ )
Disallow: /
# Snarfer RSS reader
User-Agent: Snarfer/0.x.x (http://www.snarfware.com/)
Disallow: /
# Unknown robot from 217.229.156.xx (T-Online Germany)
User-Agent: SnoopRob/x.x
Disallow: /
# Snoopy PHP-client
User-Agent: Snoopy v1.xx
Disallow: /
# Snoopy PHP-client
User-Agent: Snoopy v1.xx- : User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; MyIE2)
Disallow: /
# Snoopy PHP-client
User-Agent: Snoopy_v0.xx
Disallow: /
# Snyke.com France robot
User-Agent: SnykeBot/0.6 (http://www.snyke.com)
Disallow: /
# Link crawler for the social sciences
User-Agent: SocSciBot ()
Disallow: /
# NetFront browser on Softbank mobile phone
User-Agent: SoftBank/1.0/812SH/SHJ001 Browser/NetFront/3.3 Profile/MIDP-2.0 Configuration/CLDC-1.1
Disallow: /
# Soft Hypermarket link checking
User-Agent: SoftHypermarketFileCheckBot/1.0+(+http://www.softhypermaket.com)
Disallow: /
# Softizer.com software directory link checking
User-Agent: Softizerbot (http://www.softizer.com)
Disallow: /
# Unknown UA from Chinanet (220.181.26.1xx) faking Sogou search robot
User-Agent: sogou develop spider
Disallow: /
# Unknown UA from Chinanet (220.181.18.xx) faking Sogou search robot
User-Agent: Sogou Orion spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)
Disallow: /
# Unknown UA from Chinanet (220.181.26.1xx) faking Sogou search robot
User-Agent: sogou spider
Disallow: /
# Unknown UA from Chinanet (220.181.26.1xx) faking Sogou search robot
User-Agent: Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)
Disallow: /
# Unknown UA from Chinanet (220.181.26.1xx) faking Sogou search robot
User-Agent: sohu agent
Disallow: /
# Sohu (Search Fox) search robot China (61.135.131.xxx)
User-Agent: sohu-search
Disallow: /
# SOSO search (China) spider
User-Agent: Sosospider+(+http://help.soso.com/webspider.htm)
Disallow: /
# Default Proxomitron (discontinued) filtering proxy user agent identifier
User-Agent: Space Bison/0.02 [fu] (Win67; X; SK)
Disallow: /
# Speed Download (Mac) download manager
User-Agent: SpeedDownload/1.x
Disallow: /
# Speedfind.de robot
User-Agent: speedfind ramBot xtreme 8.1
Disallow: /
# Entireweb search robot
User-Agent: Speedy Spider (Beta/x.x; speedy@entireweb.com)
Disallow: /
# Entireweb search spider
User-Agent: Speedy Spider (Entireweb; Beta/1.0; http://www.entireweb.com/about/search_tech/speedyspider/)
Disallow: /
# Entireweb search robot
User-Agent: Speedy_Spider (http://www.entireweb.com)
Disallow: /
# Sphere blog and news search robot
User-Agent: Sphere Scout&v4.0 - scout at sphere dot com
Disallow: /
# Sphider - a lightweight search engine in PHP
User-Agent: Sphider
Disallow: /
# Only.com robot
User-Agent: Spida/0.1
Disallow: /
# Search-Info ODP/DMOZ spider
User-Agent: Spider-Sleek/2.0 (+http://search-info.com/linktous.html)
Disallow: /
# Batsch robot
User-Agent: spider.batsch.com
Disallow: /
# TerraNautic spider for Schnellsuchen touristic search (Germany)
User-Agent: Spider.TerraNautic.net - v:1.04
Disallow: /
# Yellopet spider
User-Agent: spider.yellopet.com - www.yellopet.com
Disallow: /
# Maxbot .gov .mil .edu indexing robot
User-Agent: Spider/maxbot.com admin@maxbot.com
Disallow: /
# Unknown robot from CPE at Kasetsart University (158.108.35.xxx)
User-Agent: SpiderKU/0.x
Disallow: /
# Yahoo Search user agent or spider (202.165.102.xxx)
User-Agent: SpiderMan
Disallow: /
# SpiderMonkey Canada robot
User-Agent: SpiderMonkey/7.0x (SpiderMonkey.ca info at http://spidermonkey.ca/sm.shtml)
Disallow: /
# Spider.de robot
User-Agent: Spinne/2.0
Disallow: /
# Medkatalog (medical catalogue) Austria robot
User-Agent: Spinne/2.0 med
Disallow: /
# Medkatalog (medical catalogue) Austria robot
User-Agent: Spinne/2.0 med_AH
Disallow: /
# Spock - people search application - via Amazon web services
User-Agent: Spock Crawler (http://www.spock.com/crawler)
Disallow: /
# Sportsuchmaschine (German sports related search) link checking / robot
User-Agent: sportsuchmaschine.de-Robot (Version: 1.02- powered by www.sportsuchmaschine.de)
Disallow: /
# Sproose personalized search (38.100.225.xx)
User-Agent: sproose/0.1-alpha (sproose crawler; http://www.sproose.com/bot.html; crawler@sproose.com)
Disallow: /
# SQ Webscanner Mac download manager
User-Agent: SQ Webscanner
Disallow: /
# Simple page-prefetch for Squid web proxy
User-Agent: Squid-Prefetch
Disallow: /
# Squidclam is a replacement for SquidClamAV-Redirector
User-Agent: squidclam
Disallow: /
# SCAVR - Squid helper script for scanning download URLs for viruses
User-Agent: SquidClamAV_Redirector 1.x.x
Disallow: /
# AOL Search / Pacific Internet Exchange robot
User-Agent: Sqworm/2.9.81-BETA (beta_release; 20011102-760; i686-pc-linux-gnu)
Disallow: /
# diff. IPs / services i.e.: - Inria.fr robot - Websense (Internet filtering) robot
User-Agent: Sqworm/2.9.85-BETA (beta_release; 20011115-775; i686-pc-linux-gnu)
Disallow: /
# Time Warner Telecom user robot ?
User-Agent: Sqworm/2.9.89-BETA (beta_release; 20020130-839; i686-pc-linux-gnu)
Disallow: /
# Some site scanning tool via diff. IPs i.e.: - choiceone.net (216.153.xxx.xxx) - epix.net (216.108.198.xx)
User-Agent: SSurf15a 11
Disallow: /
# Rambler search (Russia) robot (81.19.6x.xx)
User-Agent: StackRambler/x.x
Disallow: /
# Stamina download manager
User-Agent: Stamina/1.4
Disallow: /
# Star Downloader download manager
User-Agent: Star Downloader
Disallow: /
# Star Downloader download manager
User-Agent: StarDownloader/1.xx
Disallow: /
# Experimental search engine spider from 66.92.186.xxx
User-Agent: stat statcrawler@gmail.com
Disallow: /
# Steeler crawler
User-Agent: Steeler/1.x (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)
Disallow: /
# Steeler - University of Tokyo web crawler
User-Agent: Steeler/3.3 (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)
Disallow: /
# Strategic Board blog & news search robot
User-Agent: Strategic Board Bot (+http://www.strategicboard.com)
Disallow: /
# Strategic Board blogs and news aggregator robot
User-Agent: Strategic Board Bot (+http://www.strategicboard.com)
Disallow: /
# Surfsafely submission verifier
User-Agent: Submission Spider at surfsafely.com
Disallow: /
# Suchbaer.de (Germany) search robot
User-Agent: suchbaer.de
Disallow: /
# Suchbaer.de (Germany) search robot
User-Agent: suchbaer.de (CrawlerAgent v0.103)
Disallow: /
# Suchbot Germany robot
User-Agent: suchbot
Disallow: /
# Suchknecht Austria robot
User-Agent: Suchknecht.at-Robot
Disallow: /
# suchpad search Germany robot (213.239.194.xx)
User-Agent: suchpadbot/1.0 (+http://www.suchpad.de)
Disallow: /
# Sunrise XP handheld news / website reader and converter
User-Agent: Sunrise XP/2.x
Disallow: /
# Sunrise XP web sites and newsfeeds converter and handheld reader
User-Agent: Sunrise/0.42g (Windows XP)
Disallow: /
# SuperBot website copier
User-Agent: SuperBot/x.x (Win32)
Disallow: /
# SuperBot website copier
User-Agent: SuperBot/x.x.x.xx (Windows XP)
Disallow: /
# Ubbi Superdownloads (Brazil) link checking
User-Agent: Superdownloads Spiderman
Disallow: /
# SurfControl Web Filtering
User-Agent: SURF
Disallow: /
# Wanadoo Rechereche robot
User-Agent: SurferF3 1/0
Disallow: /
# Maskbit Surfmaster bookmark tool
User-Agent: SurfMaster
Disallow: /
# Whois Source domain name information robot (66.249.26.xx)
User-Agent: SurveyBot/2.2 Whois Source
Disallow: /
# Whois Source domain name information robot (66.249.26.xx)
User-Agent: SurveyBot/2.3 (Whois Source)
Disallow: /
# Yokogao Search Engine robot (Kanazawa University)
User-Agent: suzuran
Disallow: /
# HP Secure Web Browser for OpenVMS
User-Agent: SWB/V1.4 (HP)
Disallow: /
# unknown
User-Agent: swbot/0.9c libwww/5.3.1
Disallow: /
# Swooglebot Swoogle's semantic web crawler
User-Agent: Swooglebot/2.0. (+http://swoogle.umbc.edu/swooglebot.htm)
Disallow: /
# SWSBot - SmartWareSoft (85.186.255.xx) software search engine created for Playfuls.com
User-Agent: SWSBot-Images/1.2 http://www.smartwaresoft.com/swsbot12.html
Disallow: /
# Sygol Search (Italy) robot
User-Agent: SygolBot http://www.sygol.net
Disallow: /
# Sylera browser (Japan)
User-Agent: Sylera/1.2.x
Disallow: /
# Mindspring.com user robot
User-Agent: SyncBot
Disallow: /
# SyncIT link validation
User-Agent: SyncIT/x.x
Disallow: /
# Syndirella desktop information aggregator (beta)
User-Agent: Syndirella/0.91pre
Disallow: /
# Synomia (France) robot
User-Agent: SynoBot
Disallow: /
# Syntryx Solution Suite - domain / keyword crawler (216.7.179.xx)
User-Agent: Syntryx ANT Scout Chassis Pheromone; Mozilla/4.0 compatible crawler
Disallow: /
# Szukacz.pl (Polish search) robot
User-Agent: Szukacz/1.x
Disallow: /
# Szukacz.pl (Polish search) robot
User-Agent: Szukacz/1.x (robot; www.szukacz.pl/jakdzialarobot.html; szukacz@proszynski.pl)
Disallow: /
# German T-Online browser & internet suite
User-Agent: T-Online Browser
Disallow: /
# tags2dir.com directory index
User-Agent: tags2dir.com/0.8 (+http://tags2dir.com/directory/)
Disallow: /
# TAGword DMOZ survey - ODP link checking robot
User-Agent: Tagword (http://tagword.com/dmoz_survey.php)
Disallow: /
# Tagyu - del.icio.us bookmark collection online tag generator
User-Agent: Tagyu Agent/1.0
Disallow: /
# Daumsoft Talkro IR robot
User-Agent: Talkro Web-Shot/1.0 (E-mail: webshot@daumsoft.com- Home: http://222.122.15.190/webshot)
Disallow: /
# Texas A&M University - Dept. of Computer Science crawler (server or link checking ?)
User-Agent: TAMU_CS_IRL_CRAWLER/1.0
Disallow: /
# Targetblaster user link validation ?
User-Agent: targetblaster.com/0.9k
Disallow: /
# Target Your News - user submitted links
User-Agent: TargetYourNews.com bot
Disallow: /
# Trinity College Dublin (Ireland) TCDBOT
User-Agent: TCDBOT/Nutch-0.8 (PhD student research;http://www.tcd.ie; mcgettrs at t c d dot IE)
Disallow: /
# HTTP header for transfer encoding used as user agent name ?
User-Agent: TE
Disallow: /
# WinInet Internet client app.
User-Agent: TeamSoft WinInet Component
Disallow: /
# Tecomac Gmbh (Germany) crawler software - now Arexera Information Technologies
User-Agent: TECOMAC-Crawler/0.x
Disallow: /
# Tecomi (Germany) beta / test robot (84.201.65.xxx)
User-Agent: Tecomi Bot (http://www.tecomi.com/bot.htm)
Disallow: /
# Teemer crawler for NetSeer search (beta) via Amazon Web Services - see also NetSeer/Nutch
User-Agent: Teemer (NetSeer, Inc. is a Los Angeles based Internet startup company.; http://www.netseer.com/crawler.html; crawler@netseer.com)
Disallow: /
# Teleport (website) downloading tool
User-Agent: Teleport Pro/1.2x(.1xxx)
Disallow: /
# Teoma crawler (65.214.36.xx[x])
User-Agent: Teoma MP
Disallow: /
# Teoma crawler (65.214.36.xx[x])
User-Agent: teomaagent crawler-admin@teoma.com
Disallow: /
# Teoma crawler (65.214.36.xx[x])
User-Agent: teomaagent1 [crawler-admin@teoma.com]
Disallow: /
# Teoma crawler (65.214.36.xx[x])
User-Agent: teoma_agent1
Disallow: /
# Teradex Directory robot
User-Agent: Teradex Mapper; mapper@teradex.com; http://www.teradex.com
Disallow: /
# Teragram multilingual text & data processing software
User-Agent: TeragramCrawler
Disallow: /
# Terraminds blog search (Germany)
User-Agent: terraminds-bot/1.0 (support@terraminds.de)
Disallow: /
# Terrawiz Indian Search Engine robot
User-Agent: TerrawizBot/1.0 (+http://www.terrawiz.com/bot.html)
Disallow: /
# Noceans Information Portfolio Manager (66.35.69.x)
User-Agent: Test spider
Disallow: /
# Balihoo - Search Engine for Advertising Media
User-Agent: TestCrawler/Nutch-0.9 (Testing Crawler for Research ; http://balihoo.com/index.aspx; tgautier at balihoo dot com)
Disallow: /
# Expert HTML online source viewer
User-Agent: The Expert HTML Source Viewer (http://www.expert-html.com)
Disallow: /
# The Rarest Words - Linguistic experiment crawler via Amazon Web Services
User-Agent: TheRarestParser/0.2a (http://therarestwords.com/)
Disallow: /
# TheSuBot robot (Germany) for an unknown theme based search engine
User-Agent: TheSuBot/0.1 (www.thesubot.de)
Disallow: /
# ThumbShots website thumbnail service (Germany) robot
User-Agent: thumbshots-de-Bot (Version: 1.02, powered by www.thumbshots.de)
Disallow: /
# ThumbShots.de (Germany) robot
User-Agent: thumbshots-de-Bot (Version: 1.02- powered by www.thumbshots.de)
Disallow: /
# Breaking Blogs timbo bot blog robot
User-Agent: timboBot/0.9 http://www.breakingblogs.com/timbo_bot.html
Disallow: /
# TimelyWeb web page monitoring tool
User-Agent: TimelyWeb/4.1 ( EldoS TimelyWeb 4.1 )
Disallow: /
# TinEye crawler for an open image search project
User-Agent: TinEye/1.1 (http://tineye.com/crawler.html)
Disallow: /
# Tivra spider from AT&T Labs Research
User-Agent: tivraSpider/1.0 (crawler@tivra.com)
Disallow: /
# Tjgroup spider
User-Agent: TJG/Spider
Disallow: /
# TJvHttpGrabber (JEDI Visual Component Library)
User-Agent: TJvMultiHttpGrabber Component
Disallow: /
# Tkensaku Search (Japan) robot from 210.239.46.xxx (www.tken.com)
User-Agent: Tkensaku/x.x(http://www.tkensaku.com/q.html)
Disallow: /
# Unknown robot from 195.68.98.xx (coltfrance.com)
User-Agent: toCrawl/UrlDispatcher
Disallow: /
# Topodia search engine and personal information assistant (in development)
User-Agent: Topodia/1.2-dev (Topodia - Crawler for HTTP content indexing; http://www.topodia.com/; support@topodia.com)
Disallow: /
# Topos search (Russia) robot
User-Agent: TOPOS robot/1.1 (http://www.topos.com.ua/)
Disallow: /
# Hoppa robot (81.4.78.xxx)
User-Agent: Toutatis x-xx.x (hoppa.com)
Disallow: /
# Hoppa robot (81.4.78.xxx)
User-Agent: Toutatis x.x (hoppa.com)
Disallow: /
# Hoppa robot (81.4.78.xxx)
User-Agent: Toutatis x.x-x
Disallow: /
# Traazi! search (Germany) robot
User-Agent: traazibot/testengine (+http://www.traazi.de)
Disallow: /
# Trailfire web collection and annotating system
User-Agent: Trailfire-bot/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
Disallow: /
# Trailfire web collection and annotating system
User-Agent: Trailfire-bot/0.7.1 (Trailfire page content analyzer; http://trailfire.com; info@trailfire.com)
Disallow: /
# Trailfire web collection and annotating system
User-Agent: Trailfire/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
Disallow: /
# Trampelpfad Webkatalog spider
User-Agent: Trampelpfad-Spider
Disallow: /
# Trampelpfad Webkatalog spider
User-Agent: Trampelpfad-Spider-v0.1
Disallow: /
# Tricos meta tag validation
User-Agent: tricosMetaCheck 1.2216-08-1999 (http://www.tricos.com/metacheck)
Disallow: /
# some bad user agent
User-Agent: TSurf15a 11
Disallow: /
# Tulip Chain browser / link checker for Dmoz.org directory
User-Agent: TulipChain/5.x (http://ostermiller.org/tulipchain/) Java/1.x.1_0x (http://java.sun.com/) Linux/2.4.17
Disallow: /
# Tulip Chain browser / link checker for Dmoz.org directory
User-Agent: TulipChain/5.xx (http://ostermiller.org/tulipchain/) Java/1.x.1_0x (http://apple.com/) Mac_OS_X/10.2.8
Disallow: /
# Tumblr Tumblelogs RSS and news syndication crawler
User-Agent: Tumblr/1.0 RSS syndication (+http://www.tumblr.com/) (support@tumblr.com)
Disallow: /
# Turnitin (ex SlySearch) robot for helping educational institutions prevent plagiarism
User-Agent: TurnitinBot/x.x (http://www.turnitin.com/robot/crawlerinfo.html)
Disallow: /
# TurnPike Emporium Directory (207.67.198.x) link checking
User-Agent: Turnpike Emporium LinkChecker/0.1
Disallow: /
# TutorGig tutorial search robot
User-Agent: TutorGig/1.5 (+http://www.tutorgig.com/crawler)
Disallow: /
# TutorGig tutorial search robot
User-Agent: Tutorial Crawler 1.4 (http://www.tutorgig.com/crawler)
Disallow: /
# Twiceler experimental web crawler
User-Agent: Twiceler www.cuill.com/robots.html
Disallow: /
# Twiceler experimental web crawler
User-Agent: Twiceler-0.9 http://www.cuill.com/twiceler/robot.html
Disallow: /
# File downloading component from Twisted Python
User-Agent: Twisted PageGetter
Disallow: /
# Twitt(url)y URL tracking service for Twitter via Amazon Web Services
User-Agent: Twitturly / v0.x
Disallow: /
# Twotrees content filter
User-Agent: Twotrees Reactive Filter V2.0
Disallow: /
# Tycoon - Hewlett-Packards distributed cluster solution robot
User-Agent: Tycoon Agent/Nutch-1.0-dev
Disallow: /
# Tygo Search robot
User-Agent: TygoBot
Disallow: /
# Tygo Search robot
User-Agent: TygoProwler
Disallow: /
# UCMore - IE navigation and search plugin
User-Agent: UCmore
Disallow: /
# UCMore - IE navigation and search plugin
User-Agent: UCMore Crawler App
Disallow: /
# Ucweb mobile browser
User-Agent: UCWEB5.1
Disallow: /
# user agent - maybe UdmSearch (see UdmSearch) ?
User-Agent: UDM
Disallow: /
# UdmSearch / MySearch (now mnoGoSeach) offline browser/search client
User-Agent: UdmSearch/3.1.x
Disallow: /
# University of Iowa Crawler- possibly MySpiders
User-Agent: UIowaCrawler/1.0
Disallow: /
# UKWizz search robot
User-Agent: UKWizz/Nutch-0.8.1 (UKWizz Nutch crawler; http://www.ukwizz.com/)
Disallow: /
# Infoseek robot
User-Agent: Ultraseek
Disallow: /
# Unknown mail harvester/spambot from 80.58.13.xxx (proxycache.rima-tde.net)
User-Agent: Under the Rainbow 2.2
Disallow: /
# Unknown robot from University of Toronto (128.100.5.1xx)
User-Agent: UofTDB_experiment (leehyun@cs.toronto.edu)
Disallow: /
# Mobile phone browser
User-Agent: UP.Browser/3.01-IG01 UP.Link/3.2.3.4
Disallow: /
# Updated! search robot
User-Agent: updated/0.1-alpha (updated crawler; http://www.updated.com; crawler@updated.com)
Disallow: /
# Updated! search robot
User-Agent: updated/0.1beta (updated.com; http://www.updated.com; crawler@updated.om)
Disallow: /
# Handspring (PalmOS powered cellphone) Treo Blazer browser
User-Agent: UPG1 UP/4.0 (compatible; Blazer 1.0)
Disallow: /
# UptimeBot.com online link popularity check
User-Agent: Uptimebot
Disallow: /
# UptimeBot.com online link popularity check
User-Agent: UptimeBot(www.uptimebot.com)
Disallow: /
# URI::Fetch - client for fetching HTTP pages and syndication feeds (RSS Atom)
User-Agent: URI::Fetch/0.06
Disallow: /
# Innerprise URL Spider Pro (now ES.NET) web indexing / site searching tool
User-Agent: URL Spider Pro/x.xx (innerprise.net)
Disallow: /
# URLBase - Internet shortcut manager
User-Agent: URLBase/6.x
Disallow: /
# URLBlaze file sharing link toolkit
User-Agent: URLBlaze
Disallow: /
# ://URLFAN news crawler
User-Agent: urlfan-bot/1.0; +http://www.urlfan.com/site/bot/350.html
Disallow: /
# URLGetFile downloading tool
User-Agent: URLGetFile
Disallow: /
# Innerprise URL Spider Pro (now ES.NET) web indexing / site searching tool
User-Agent: URL_Spider_Pro/x.x
Disallow: /
# Innerprise URL Spider Pro (now ES.NET) web indexing / site searching tool
User-Agent: URL_Spider_Pro/x.x+(http://www.innerprise.net/usp-spider.asp)
Disallow: /
# BoardReader search favicon fetcher
User-Agent: User-Agent: BoardReader Favicon Fetcher /1.0 info@boardreader.com
Disallow: /
# BoardReader search image fetcher
User-Agent: User-Agent: BoardReader Image Fetcher /1.0 info@boardreader.com
Disallow: /
# ljpic.com - LiveJournal picture feed search
User-Agent: User-Agent: LjSEEK Picture-Bot /1.0 contact@ljseek.com
Disallow: /
# FileHeap download manager
User-Agent: User-Agent: FileHeap! file downloader (http://www.fileheap.com)
Disallow: /
# Malformed UA header from some guestbook/forum spammer
User-Agent: User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
Disallow: /
# Skizzle search robot
User-Agent: User-Agent: Mozilla/4.0 (SKIZZLE! Distributed Internet Spider v1.0 - www.SKIZZLE.com)
Disallow: /
# unknown robot (reads robots.txt) or sitegrabber. From different IPs- ie.: 62.98.8.xx (wind.it)
User-Agent: user-agent=Mozilla/3.01Gold
Disallow: /
# University of Sydney NLP Spider for research in Natural Language Processing
User-Agent: USyd-NLP-Spider (http://www.it.usyd.edu.au/~vinci/bot.html)
Disallow: /
# Web Thief Site Grabber
User-Agent: UtilMind HTTPGet
Disallow: /
# WebWasher ad filter
User-Agent: Utopia WebWasher 3.0
Disallow: /
# uTorrent BitTorrent client
User-Agent: uTorrent/1500
Disallow: /
# Unknown bad behaving bot via Road Runner - see link
User-Agent: VadixBot
Disallow: /
# WiseGuys WAP pages robot
User-Agent: Vagabondo-WAP/2.0 (webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)/1.0 Profile
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Vagabondo/1.x MT (webagent@wise-guys.nl)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Vagabondo/2.0 MT
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Vagabondo/2.0 MT (webagent at wise-guys dot nl)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Vagabondo/2.0 MT (webagent@NOSPAMwise-guys.nl)
Disallow: /
# WiseGuys robot Netherland - 82.94.216.2
User-Agent: Vagabondo/3.0 (webagent at wise-guys dot nl)
Disallow: /
# Open Directory link checking from Vakes
User-Agent: Vakes/0.01 (Vakes; http://www.vakes.com/; search@vakes.com)
Disallow: /
# unknown level3.net (63.214.172.xxx) robot
User-Agent: VayalaCreep-v0.0.1 (haploid@haploid.com)
Disallow: /
# unknown level3.net (63.214.172.xxx) robot
User-Agent: Vayala|Creep-v0.0.1 (codepoet@wildties.com)
Disallow: /
# iNet Grabber - Internet content grabber
User-Agent: vb wininet
Disallow: /
# Versus Project robot - Comparing methods for near-uniform URL sampling
User-Agent: versus 0.2 (+http://versus.integis.ch)
Disallow: /
# Unknown robot from EPFL University Switzerland (128.178.155.xxx)
User-Agent: versus crawler eda.baykan@epfl.ch
Disallow: /
# Verticrawl - Semantic search engine solution (French)
User-Agent: Verticrawlbot
Disallow: /
# VeryGoodSearch.com link submission checking
User-Agent: VeryGoodSearch.com.DaddyLongLegs
Disallow: /
# Verzamelgids NL link checking robot
User-Agent: verzamelgids.nl - Networking4all Bot/x.x
Disallow: /
# Verzamelgids NL link checking robot
User-Agent: Verzamelgids/2.2 (http://www.verzamelgids.nl)
Disallow: /
# Unknown robot from Yahoo Norway
User-Agent: Vespa Crawler
Disallow: /
# Sidewinder G2 anti-virus and anti-spyware protection
User-Agent: virus_detector (virus_harvester@securecomputing.com)
Disallow: /
# Visbot crawler for a search software under development
User-Agent: VisBot/2.0 (Visvo.com Crawler; http://www.visvo.com/bot.html; bot@visvo.com)
Disallow: /
# Some IE toolbar made with Visicom Media Dynamic Toolbar software
User-Agent: Visicom Toolbar
Disallow: /
# Vision research lab's Cortina - content based image retrieval (128.111.60.xx)
User-Agent: Vision Research Lab image spider at vision.ece.ucsb.edu
Disallow: /
# VLC - Cross-platform media player and streaming server
User-Agent: VLC media player - version 0.8.5 Janus - (c) 1996-2006 the VideoLAN team
Disallow: /
# VM - Vertical Search Engine (China)
User-Agent: VMBot/0.x.x (VMBot; http://www.VerticalMatch.com/; vmbot@tradedot.com)
Disallow: /
# Vortex Web Indexing Robot for a study on internet link distribution
User-Agent: Vortex/2.2 (+http://marty.anstey.ca/robots/vortex/)
Disallow: /
# Kosmix health, auto and travel search crawler (204.14.48.x / 38.113.234.xxx)
User-Agent: voyager-hc/1.0
Disallow: /
# Kosmix health, auto and travel search crawler (204.14.48.x / 38.113.234.xxx)
User-Agent: voyager/1.0
Disallow: /
# Kosmix health, auto and travel search crawler (204.14.48.x / 38.113.234.xxx)
User-Agent: voyager/2.0 (http://www.kosmix.com/html/crawler.html)
Disallow: /
# Vivisimo search crawler (206.210.89.xxx)
User-Agent: VSE/1.0 (testcrawler@hotmail.com)
Disallow: /
# Vivisimo search crawler (206.210.89.xxx)
User-Agent: VSE/1.0 (testcrawler@vivisimo.com)
Disallow: /
# Verity vspider indexing software
User-Agent: vspider
Disallow: /
# Verity vspider indexing software
User-Agent: vspider/3.x
Disallow: /
# VWBot - MetaQuerier Crawler for the MetaQuerier project at the University of Illinois
User-Agent: VWBOT/Nutch-0.9-dev (VWBOT Nutch Crawler; http://vwbot.cs.uiuc.edu;+vwbot@cs.uiuc.edu
Disallow: /
# W3C Link Checker
User-Agent: W3C-checklink/3.x.x.x libwww-perl/5.xx
Disallow: /
# W3C Link Checker
User-Agent: W3C-checklink/4.x [4.xx] libwww-perl/5.xxx
Disallow: /
# WebCon - the Libwww command line tool
User-Agent: W3C-WebCon/5.x.x libwww/5.x.x
Disallow: /
# W3C Line Mode (character based Web browser)
User-Agent: W3CLineMode/5.4.0 libwww/5.x.x
Disallow: /
# Unknown link checking using Libwww via Korea Telecom (221.148.44.xxx)
User-Agent: W3CRobot/5.4.0 libwww/5.4.0
Disallow: /
# W3C HTML-Code Validator
User-Agent: W3C_Validator/1.xxx libwww-perl/5.xx
Disallow: /
# w3m Linux pager / text-based browser
User-Agent: w3m/0.x.xx
Disallow: /
# W3 Site Search (Germany) search engine solution
User-Agent: W3SiteSearch Crawler_v1.1 http://www.w3sitesearch.de
Disallow: /
# Wadain (Japan) Blog / RSS search crawler
User-Agent: wadaino.jp-crawler 0.2 (http://wadaino.jp/)
Disallow: /
# Wanna-Be text mode browser
User-Agent: WannaBe (Macintosh; PPC)
Disallow: /
# WapOnWindows WAP browser for PCs
User-Agent: WapOnWindows 1.0
Disallow: /
# Watchfire WebXM intranet solution
User-Agent: Watchfire WebXM 1.0
Disallow: /
# WAVcheck - Simple Vendor Discovery Tool for detecting client-side tags from web analytics vendors
User-Agent: WAVcheck 1.0.x (http://www.webbanalys.se/apps/WAVcheck/)
Disallow: /
# Wavefire local search community engine (64.141.15.1xx)
User-Agent: Wavefire/0.8-dev (Wavefire; http://www.wavefire.com; info@wavefire.com)
Disallow: /
# Waypath blog discovery engine robot
User-Agent: Waypath development crawler - info at waypath dot com
Disallow: /
# Waypath blog discovery engine robot
User-Agent: Waypath Scout v2.x - info at waypath dot com
Disallow: /
# WDG HTML-code validator
User-Agent: WDG_Validator/1.1
Disallow: /
# Datafire.com's Web Image Collector (graphics downloading tool)
User-Agent: Web Image Collector
Disallow: /
# Relsoft link checking software
User-Agent: Web Link Validator 1.5
Disallow: /
# RankMeter ranking software
User-Agent: Web Snooper
Disallow: /
# Web-bekannt German web directory link checking
User-Agent: web-bekannt (Version: 1.02, powered by www.internetservice-franken.de)
Disallow: /
# Web-bekannt German web directory link checking
User-Agent: web-bekannt (Version: 1.02, powered by www.web-bekannt.de)
Disallow: /
# Unkown link or server checking from Würzburg University Germany (132.187.10.xx)
User-Agent: Web-Bot V1.03
Disallow: /
# Unknown robot from 69.50.233.x (nectartech.com)
User-Agent: Web-Robot/5.0 (en-US; web-robot.com/policy.html) Web-Robot Crawler/2.0.3
Disallow: /
# Web2Express / Web2x - Open data searching tool
User-Agent: web2express.org/Nutch-0.9-dev (leveled playing field; http://web2express.org/; info at web2express.org)
Disallow: /
# WebAlta search Russia crawler (85.21.201.xx)
User-Agent: WebAlta Crawler/1.2.1 (http://www.webalta.ru/bot.html)
Disallow: /
# WebarooBot / RufusBot from webaroo offline search service
User-Agent: WebarooBot (Webaroo Bot; http://64.124.122.252/feedback.html)
Disallow: /
# WebarooBot / RufusBot from webaroo offline search service
User-Agent: WebarooBot (Webaroo Bot; http://www.webaroo.com/rooSiteOwners.html)
Disallow: /
# Yanasoft WebAuto website copier / downloading tool
User-Agent: WebAuto/3.4xxx (WinNT; I)
Disallow: /
# Web Bandit personal search software
User-Agent: webbandit/4.xx.0
Disallow: /
# Amansoft WebBug web server protocol test
User-Agent: WebBug/5.x
Disallow: /
# WebClipping.com - online news monitoring service
User-Agent: Webclipping.com
Disallow: /
# WebCollage Syndicator graphics crawler/collector
User-Agent: webcollage/1.xx
Disallow: /
# Quarterdecks WebCompass search tool
User-Agent: WebCompass 2.0
Disallow: /
# WebCopier offline browser
User-Agent: WebCopier vx.x
Disallow: /
# WebCopier offline browser
User-Agent: WebCopier vx.xa
Disallow: /
# WebCorp linguistic search engine (UK)
User-Agent: WebCorp/1.0
Disallow: /
# Webcrawl Search robot (64.40.105.xxx)
User-Agent: webcrawl.net
Disallow: /
# Unix/Linux Web Downloader
User-Agent: WebDownloader for X x.xx
Disallow: /
# Unknown robot from china-netcom.com
User-Agent: Webdup/0.9
Disallow: /
# WingFlyer WebFetch website downloading tool
User-Agent: WebFetch
Disallow: /
# webfetch - command line tool to fetch files via HTTP
User-Agent: webfetch/5.x.x
Disallow: /
# Verso NetSpective WebFilter
User-Agent: WebFilter Robot 1.0
Disallow: /
# Telemate.net NetSpective WebFilter
User-Agent: WebFilter Robot 1.x
Disallow: /
# Webfind search robot
User-Agent: WebFindBot(http://www.web-find.com)
Disallow: /
# Webglimpse search engine software
User-Agent: Webglimpse 2.xx.x (http://webglimpse.net)
Disallow: /
# webGobbler - Online random image generator
User-Agent: webGobbler/1.x.x
Disallow: /
# fake ?
User-Agent: webhack
Disallow: /
# herbert.groot.jebbink.nl Web Images collage generator
User-Agent: WebImages 0.3 ( http://herbert.groot.jebbink.nl/?app=WebImages )
Disallow: /
# WebLight web analyzer & link checker
User-Agent: WebLight/4.x.x (support@illumit.com; http://www.illumit.com/Products/weblight/)
Disallow: /
# WebLink's link management system for HTTP- FTP and Mail hyperlinks
User-Agent: Weblink's checker/
Disallow: /
# Los Alamos National Laboratoy weblog research project
User-Agent: Weblog Attitude Diffusion 1.0
Disallow: /
# Unknown robot from Leipzig University (Germany) faculty for computer science
User-Agent: webmeasurement-bot, http://rvs.informatik.uni-leipzig.de
Disallow: /
# WebMiner bulk file downloader
User-Agent: WebMiner/x.x [en] (Win98; I)
Disallow: /
# WeBoX (Japan) - Browser and web collector
User-Agent: WeBoX/0.xx
Disallow: /
# WebPix - picture downloading tool
User-Agent: WebPix 1.0 (www.netwu.com)
Disallow: /
# Caesius WebQL - Custom robot/agent generator / web extraction software
User-Agent: WebQL
Disallow: /
# WebRACE - HTTP retrieval- annotation and caching engine
User-Agent: WebRACE/1.1 (University of Cyprus- Distributed Crawler)
Disallow: /
# WebRankSpider experimental web crawler
User-Agent: WebRankSpider/1.37 (+http://ulm191.server4you.de/crawler/)
Disallow: /
# Webreaper download manager
User-Agent: WebReaper vx.x - www.webreaper.net
Disallow: /
# Webreaper download manager
User-Agent: WebReaper [info@webreaper.net]
Disallow: /
# Webreaper download manager
User-Agent: WebReaper [webreaper@webreaper.net]
Disallow: /
# Websearch Australia robot
User-Agent: WebSearch.COM.AU/3.0.1 (The Australian Search Engine; http://WebSearch.COM.AU; Search@WebSearch.COM.AU)
Disallow: /
# Dortmund University WebSearchBench - Open source search software
User-Agent: WebSearchBench WebCrawler v0.1(Experimental)
Disallow: /
# Dortmund University WebSearchBench - Open source search software
User-Agent: WebSearchBench WebCrawler V1.0 (Beta)- Prof. Dr.-Ing. Christoph Lindemann- Universität Dortmund- cl@cs.uni-dortmund.de- http://websearchbench.cs.uni-dortmund.de/
Disallow: /
# Web site downloading tool and offline browser (Japan)
User-Agent: Website Explorer/0.9.x.x
Disallow: /
# Website eXtractor web site downloading tool
User-Agent: Website eXtractor
Disallow: /
# Sootle web directory Website Worth ranking tool
User-Agent: WebsiteWorth v1.0
Disallow: /
# Webspinne.de robot
User-Agent: Webspinne/1.0 webmaster@webspinne.de
Disallow: /
# Websquash.com Search Engine robot / link checking
User-Agent: Websquash.com (Add url robot)
Disallow: /
# WebStat - Java statistical computing environment for the web
User-Agent: WebStat/1.0 (Unix; beta; 20040314)
Disallow: /
# Webster - Rev. Healeys web crawler
User-Agent: Webster v0.3 ( http://webster.healeys.net/ )
Disallow: /
# Websters Webmaster Archive (Germany) submission / pad checking
User-Agent: webster-internet.de pad browser
Disallow: /
# WebStripper download manager
User-Agent: WebStripper/2.xx
Disallow: /
# WebTrafficExpress IBM server software
User-Agent: WebTrafficExpress/x.0
Disallow: /
# Web Trends link analyzer
User-Agent: WebTrends/3.0 (WinNT)
Disallow: /
# The Stanford WebBase Project crawler
User-Agent: WebVac (webmaster@pita.stanford.edu)
Disallow: /
# webval - Python link checking tool
User-Agent: WebVal/1.0
Disallow: /
# Webverzeichnis.de (Germany) directory robot
User-Agent: Webverzeichnis.de - Telefon: 01908 / 26005
Disallow: /
# Web Vulnerability Crawler
User-Agent: WebVulnCrawl.unknown/1.0 libwww-perl/5.803
Disallow: /
# Studio Net.Idea's Web Watcher Monitor robot
User-Agent: WebWatcherMonitor/2.01
Disallow: /
# WebZip offline browser
User-Agent: WebZIP/x.x (http://www.spidersoft.com)
Disallow: /
# Unknown spam bot / harvester (62.163.**.** / 62.194.**.*)
User-Agent: Wells Search II
Disallow: /
# Some spam bot- see link
User-Agent: WEP Search 00
Disallow: /
# wwIPStuff - Internet client tools for Visual FoxPro
User-Agent: West Wind Internet Protocols 4.xx
Disallow: /
# IBM's Almaden Research robot (Clever search project)
User-Agent: WFARC
Disallow: /
# GNU wget - file downloader
User-Agent: Wget/1.x(.x)GNU wget http://www.gnu.org/software/wget/wget.html - file downloader
Disallow: /
# GNU wget - file downloader
User-Agent: Wget/1.x+cvs-stable (Red Hat modified)
Disallow: /
# GNU wget - file downloader
User-Agent: Wget/1.x.x+cvs
Disallow: /
# Whatsup Gold network monitor
User-Agent: Whatsup/x.x
Disallow: /
# WhatUSeek / Chubba robot
User-Agent: whatUseek_winona/3.0
Disallow: /
# WhizBang! Labs (closed since May 2002) information extraction robot
User-Agent: WhizBang! Lab
Disallow: /
# some download agent
User-Agent: Wildsoft Surfer
Disallow: /
# Twotrees crawler
User-Agent: Willow Internet Crawler by Twotrees V2.1
Disallow: /
# unknown robot from gw.ocg-corp.com (209.126.176.x)
User-Agent: WinampMPEG/2.00 (larbin@unspecified.mail)
Disallow: /
# Super Affiliate Tracker agent by Wincer Song
User-Agent: WincerSong Agent v1.0
Disallow: /
# Windows Media Player 10
User-Agent: Windows-Media-Player/10.00.00.xxxx
Disallow: /
# Nicksoft WinGet download manager
User-Agent: WinGet 1.1
Disallow: /
# Example code for a WinHTTP C++ library crawler
User-Agent: WinHTTP Example/1.0
Disallow: /
# Wink beta search robot (64.13.136.x)
User-Agent: WinkBot/0.06 (Wink.com search engine web crawler; http://www.wink.com/Wink:WinkBot; winkbot@wink.com)
Disallow: /
# WinPodder - Podcast player and RSS reader
User-Agent: WinPodder (http://winpodder.com)
Disallow: /
# WinWap - Windows PC WAP browser
User-Agent: WinWAP/3.x (3.x.x.xx; Win32) (Google WAP Proxy/1.0)
Disallow: /
# Yammba web directory (Germany) link checking
User-Agent: Wir sind die Borg (Version: 1.03, Sie wurden Assimiliert +http://www.yammba.com/suchmaschine/bot.html)
Disallow: /
# WIRE crawler used by the University of Pisa - Italy
User-Agent: WIRE/0.11 (Linux; i686; Bot,Robot,Spider,Crawler,aromano@cli.di.unipi.it)
Disallow: /
# WIRE - Web information retrieval environment crawler
User-Agent: WIRE/0.x (Linux; i686; Bot,Robot,Spider,Crawler)
Disallow: /
# Korea Wisenut robot
User-Agent: WISEbot/1.0 (WISEbot@koreawisenut.com; http://wisebot.koreawisenut.com)
Disallow: /
# Wisewire domain checker (Discontinued)
User-Agent: WiseWire-Spider2
Disallow: /
# WISH academic research project for link checking
User-Agent: wish-project (http://wish.slis.tsukuba.ac.jp/)
Disallow: /
# Wordchamp web page vocabulary / translation robot
User-Agent: WordChampBot
Disallow: /
# WordPress personal Blog publishing platform
User-Agent: WordPress/x.x.x.x PHP/4.x.xx
Disallow: /
# WORIO (beta) search for computer scientists and programmers using Heritrix open-source crawler
User-Agent: worio heritrix bot (+http://worio.com/)
Disallow: /
# WORIO (beta) search for computer scientists and programmers via Amazon Web Services
User-Agent: woriobot ( http://www.worio.com/)
Disallow: /
# Entireweb Search robot (62.13.25.xxx)
User-Agent: WorldLight
Disallow: /
# unknown link checking (from 4.18.57.126) ?
User-Agent: WorQmada/1.0
Disallow: /
# Wotbox spider
User-Agent: Wotbox/alpha0.6 (bot@wotbox.com; http://www.wotbox.com)
Disallow: /
# Wotbox spider
User-Agent: Wotbox/alpha0.x.x (bot@wotbox.com; http://www.wotbox.com) Java/1.4.1_02
Disallow: /
# WebSearchBench crawler from Dortmund University- Germany
User-Agent: WSB WebCrawler V1.0 (Beta)- cl@cs.uni-dortmund.de
Disallow: /
# WebSearchBench crawler from Dortmund University- Germany
User-Agent: WSB, http://websearchbench.cs.uni-dortmund.de
Disallow: /
# WUME Lab's web crawler (128.180.121.xxx)
User-Agent: wume_crawler/1.1 (http://wume.cse.lehigh.edu/~xiq204/crawler/)
Disallow: /
# Wusage log-file analysis
User-Agent: Wusage/x.0@boutell.com
Disallow: /
# WWLib - Wolverhampton Univerity Web Library for classifying web documents
User-Agent: Wwlib/Linux
Disallow: /
# WWSBOT web server version checker
User-Agent: WWSBOT 1.x [--- http://www.analyzer.nu ---]
Disallow: /
# Perl web page fetching module
User-Agent: WWW-Mechanize/1.1x
Disallow: /
# Arianna robot
User-Agent: www.arianna.it
Disallow: /
# Business-Socket.com link checking ?
User-Agent: www.business-socket.com registry verify/1.x
Disallow: /
# The DoWeb UK Business directory link checking
User-Agent: www.doweb.co.uk crawler
Disallow: /
# www4mail - web navigation & database search by e-mail
User-Agent: www4mail/2.x libwww-FM/2.14 (Unix; I)
Disallow: /
# WWWC Updating check of Web pages. (Japanese only)
User-Agent: WWWC/1.0x
Disallow: /
# World Wide Weasel Germany robot
User-Agent: WWWeasel Robot v1.00 (http://wwweasel.de)
Disallow: /
# WWWoffle download manager
User-Agent: WWWOFFLE/2.x
Disallow: /
# Unknown robot from CIS at Munich University
User-Agent: wwwster/1.x (Beta- mailto:gue@cis.uni-muenchen.de)
Disallow: /
# wxDownload Fast (wxDFast) open source download manager
User-Agent: wxDownload Fast
Disallow: /
# Arexera (Germany) crawler software
User-Agent: X-Crawler
Disallow: /
# Xaldon WebSpider offline browser
User-Agent: Xaldon WebSpider
Disallow: /
# Xenu link checker
User-Agent: Xenu Link Sleuth 1.xx
Disallow: /
# Xenu link checker
User-Agent: Xenu's Link Sleuth 1.x[a-z]
Disallow: /
# XerKa text mining and information retrieval software
User-Agent: Xerka WebBot v1.0.0 [UPVOpenDir]
Disallow: /
# xine - free Linux / OS/2 multimedia player
User-Agent: xine/1.0
Disallow: /
# XIRQ search (beta) robot (70.86.206.1xx)
User-Agent: xirq/0.1-beta (xirq; http://www.xirq.com; xirq@xirq.com)
Disallow: /
# GPath / XMLSlurp - Expression language for tree structured data
User-Agent: XMLSlurp/0.1 libwww-perl/5.805
Disallow: /
# Metamark URL Shorten Service
User-Agent: XRL/2.00b1 (Linux; i686; en-us) (+http://metamark.net/about)
Disallow: /
# Xylix Retrieval System software
User-Agent: Xylix
Disallow: /
# Inria Crawler
User-Agent: xyro_(xcrawler@cosmos.inria.fr)
Disallow: /
# Yahoo Search Japan robot (211.14.8.2xx)
User-Agent: Y!J-BSC/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)
Disallow: /
# Yahoo Search Japan robot (203.216.197.xxx)
User-Agent: Y!J-SRD/1.0
Disallow: /
# Yahoo Search Japan robot (211.14.8.2xx)
User-Agent: Y!J/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)
Disallow: /
# Yahoo picture service for mobiles
User-Agent: Y!OASIS/TEST no-ad Mozilla/4.08 [en] (X11; I; FreeBSD 2.2.8-STABLE i386)
Disallow: /
# Y!TunnelPro - Yahoo! Messenger companion user agent
User-Agent: Y!TunnelPro
Disallow: /
# Yacy distributed P2P web search engine robot
User-Agent: yacy (www.yacy.net; v20040602; i386 Linux 2.4.26-gentoo-r13; java 1.4.2_06; MET/en)
Disallow: /
# Yacy distributed P2P web search engine robot
User-Agent: yacybot (x86 Windows XP 5.1; java 1.5.0_06; Europe/de) yacy.net
Disallow: /
# (Yahoo) Pipes interactive data aggregator robot
User-Agent: Yahoo Pipes 1.0
Disallow: /
# Yahoo Mindset: Intent-driven Search (66.228.182.1xx)
User-Agent: Yahoo! Mindset
Disallow: /
# Yahoo blog indexing robot (209.191.83.1xx)
User-Agent: Yahoo-Blogs/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )
Disallow: /
# Yahoo multimedia crawler (206.190.43.xx)
User-Agent: Yahoo-MMAudVid/1.0 (mms dash mmaudvidcrawler dash support at yahoo dash inc dot com)
Disallow: /
# Yahoo multimedia crawler
User-Agent: Yahoo-MMAudVid/2.0(mms dash mm aud vid crawler dash support at yahoo dash inc.com ;Mozilla 4.0 compatible; MSIE 7.0;Windows NT 5.0; .NET CLR 2.0)
Disallow: /
# Yahoo multimedia crawler via Fastsearch.net (66.77.73.xx)
User-Agent: Yahoo-MMCrawler/3.x (mm dash crawler at trd dot overture dot com)
Disallow: /
# Yahoo Search robot
User-Agent: Yahoo-Test/4.0
Disallow: /
# Yahoo crawler via Overture (66.77.73.3x)
User-Agent: Yahoo-VerticalCrawler-FormerWebCrawler/3.9 crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler
Disallow: /
# Yahoo Publisher Network RSS crawler
User-Agent: YahooFeedSeeker/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://publisher.yahoo.com/rssguide)
Disallow: /
# Yahoo Product Search crawler ( 68.142.195..x)
User-Agent: YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/)
Disallow: /
# Yahoo Product Search crawler ( 66.196.93.x)
User-Agent: YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)
Disallow: /
# Yahoo Product Search crawler ( 66.196.93.x)
User-Agent: YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/yahooseeker.html)
Disallow: /
# Yahoo Product Search crawler ( 66.196.93.x)
User-Agent: YahooSeeker/1.1 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)
Disallow: /
# Yahoo Product Search crawler ( 68.142.195..x)
User-Agent: YahooSeeker/bsv3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )
Disallow: /
# Yahoo robot
User-Agent: YahooSeeker/CafeKelsa-dev (compatible; Konqueror/3.2; FreeBSD ;cafekelsa-dev-webmaster@yahoo-inc.com )
Disallow: /
# Yandex Search Russia link checking (213.180.206.2xx)
User-Agent: Yandex/1.01.001 (compatible; Win16; I)
Disallow: /
# Yanga search robot by Gigabase (Russian Federation)
User-Agent: Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)
Disallow: /
# Yarienavoir search (Belgium) robot
User-Agent: yarienavoir.net/0.2
Disallow: /
# 1noon.com search Korea robot (222.231.21.xxx)
User-Agent: Yeti
Disallow: /
# 1noon.com search Korea robot (222.231.21.xxx)
User-Agent: Yeti/0.01 (nhn/1noon, yetibot@naver.com, check robots.txt daily and follows it)
Disallow: /
# Naver search (Korea) robot
User-Agent: Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)
Disallow: /
# yggdrasil spider for GoPubMed biorelated search engine
User-Agent: yggdrasil/Nutch-0.9 (yggdrasil biorelated search engine; www dot biotec dot tu minus dresden do de slash schroeder; heiko dot dietze at biotec dot tu minus dresden dot de)
Disallow: /
# Yodao search (China)
User-Agent: YodaoBot/1.0 (http://www.yodao.com/help/webmaster/spider/; )
Disallow: /
# Yoono - community based search (193.110.140.xxx / 194.0.179.[x]xx)
User-Agent: yoofind/yoofind-0.1-dev (yoono webcrawler; http://www.yoono.com ; MyEmail)
Disallow: /
# Yoogli search (under development) agent
User-Agent: yoogliFetchAgent/0.1
Disallow: /
# Yoono - community based search (193.110.140.xxx / 194.0.179.[x]xx)
User-Agent: yoono/1.0 web-crawler/1.0
Disallow: /
# YottaCars bot - YottaCar car search engine ( 64.62.175.xxx)
User-Agent: YottaCars_Bot/4.12 (+http://www.yottacars.com) Car Search Engine
Disallow: /
# YottaShopping bot - YottaShopping search engine ( 64.62.175.xxx)
User-Agent: YottaShopping_Bot/4.12 (+http://www.yottashopping.com) Shopping Search Engine
Disallow: /
# Y!TunnelPro - Yahoo! Messenger companion user agent
User-Agent: YTunnelPro
Disallow: /
# Z-Add online link checker
User-Agent: Z-Add Link Checker (http://w3.z-add.co.uk/linkcheck/)
Disallow: /
# Zao crawler for Kototoi Project
User-Agent: Zao-Crawler
Disallow: /
# Zao crawler for Kototoi Project
User-Agent: Zao-Crawler 0.2b
Disallow: /
# Zao crawler for Kototoi Project
User-Agent: Zao/0.1 (http://www.kototoi.org/zao/)
Disallow: /
# Zeus Internet Marketing Robot based on Webster Pro component
User-Agent: ZBot/1.00 (icaulfield@zeus.com)
Disallow: /
# Zearchit German search / directory
User-Agent: Zearchit
Disallow: /
# Ze.bz Moteur de Recherche robot
User-Agent: ZeBot_lseek.net (bot@ze.bz)
Disallow: /
# Ze.bz Moteur de Recherche robot
User-Agent: ZeBot_www.ze.bz (ze.bz@hotmail.com)
Disallow: /
# ZedZo Search (beta) robot
User-Agent: zedzo.digest/0.1 (http://www.zedzo.com/)
Disallow: /
# Zend PHP frameworks Zend_Http_Client component
User-Agent: Zend_Http_Client
Disallow: /
# Powerset Natural Language Search crawler (under development) using Heritrix via Amazon Web Services
User-Agent: zermelo Mozilla/5.0 compatible; heritrix/1.12.1 (+http://www.powerset.com) [email:crawl@powerset.com,email:paul@page-store.com]
Disallow: /
# Zerx search robot ?
User-Agent: zerxbot/Version 0.6 libwww-perl/5.79
Disallow: /
# Zeus Internet Marketing Robot (based on Webster Pro)
User-Agent: Zeus ThemeSite Viewer Webster Pro V2.9 Win32
Disallow: /
# Zeus Internet Marketing Robot (based on Webster Pro)
User-Agent: Zeus xxxxx Webster Pro V2.9 Win32
Disallow: /
# Zeusbot robot for building the Ulsysseek.com index
User-Agent: Zeusbot/0.07 (Ulysseek's web-crawling robot; http://www.zeusbot.com; agent@zeusbot.com)
Disallow: /
# Unknown agent (server- or link checking ?) from 198.173.158.xx
User-Agent: Ziggy -- The Clown From Hell!!
Disallow: /
# Zipp.net web search robot
User-Agent: ZipppBot/0.xx (ZipppBot; http://www.zippp.net; webmaster@zippp.net)
Disallow: /
# Zipp.net web search robot
User-Agent: ZIPPPCVS/0.xx (ZipppBot/.xx;http://www.zippp.net; webmaster@zippp.net)
Disallow: /
# Zippyfinder robot
User-Agent: Zippy v2.0 - Zippyfinder.com
Disallow: /
# Microsoft Zoo Tycoon 2 game client
User-Agent: Zoo Tycoon 2 Client -- http://www.zootycoon.com
Disallow: /
# Zoom Search Engine software spider
User-Agent: ZoomSpider - wrensoft.com
Disallow: /
# zspider robot for a new search engine
User-Agent: zspider/0.9-dev http://feedback.redkolibri.com/
Disallow: /
# Wisenut robot
User-Agent: ZyBorg/1.0 (ZyBorg@WISEnut.com; http://www.WISEnut.com)
Disallow: /