####
# Copied from 
# https://www.gamers.org/robots.txt
#
#######


#############   Exclusions section for specific robots   #################

####	Exclude loc-crawler - it gets at high speed w/no delay
#	accessing from lx8.loc.gov 140.147.249.70 starting April 15 2011
#
User-agent: Mozilla/5.0 (compatible; loc-crawler
Disallow: /

####	Exclude TAGENT - it requests robots.txt before every GET
#	and GETs files too quickly.  Here is a sample from the access log:
# sv.tkensaku.com - - [22/Jan/2002:11:38:05 -0500] "GET /robots.txt HTTP/1.0" 200 210 "TAGENT/V0.5"
# sv.tkensaku.com - - [22/Jan/2002:11:38:06 -0500] "GET /reviews/ HTTP/1.0" 200 14750 "TAGENT/V0.5"
# sv.tkensaku.com - - [22/Jan/2002:11:38:08 -0500] "GET /robots.txt HTTP/1.0" 200 210 "TAGENT/V0.5"
# sv.tkensaku.com - - [22/Jan/2002:11:38:09 -0500] "GET /previews/ HTTP/1.0" 200 9163 "TAGENT/V0.5"
# sv.tkensaku.com - - [22/Jan/2002:11:38:10 -0500] "GET /robots.txt HTTP/1.0" 200 210 "TAGENT/V0.5"
# sv.tkensaku.com - - [22/Jan/2002:11:38:12 -0500] "GET /articles/ HTTP/1.0" 200 9489 "TAGENT/V0.5"
#
User-agent: TAGENT
Disallow: /

####	Exclude Teleport Pro
#
# Teleport Pro has a bug where it interprets HREF=".." as a file and
# constructs and submits bad URLs, resulting in many Not Found errors.
# Apache should redirect URIs ending in ".." to the 'real' directory.
#
User-agent: Teleport Pro
Disallow: /

####	Exclude AlkalineBOT
#
#	On 10-Mar-2002 from remote host syr-24-95-161-196.twcny.rr.com
#
User-agent: AlkalineBOT
Disallow: /

####	Exclude Whizbang (see http://www.whizbang.com/crawler)
#
User-agent: Whizbang
Disallow: /

####	Exclude UniverseBot
#
#	No delay between requests.  It strips off trailing slash, thus
#	triggering redirects.  It does both HEAD and GET.  Sample:
#
#	07:18:04 "HEAD /companies/ensemble HTTP/1.0" 301 0 "UniverseBot/1.0"
#	07:18:06 "HEAD /companies/ensemble/ HTTP/1.0" 200 0 "UniverseBot/1.0"
#	07:18:07 "GET /companies/ensemble HTTP/1.0" 301 247 "UniverseBot/1.0"
#	07:18:09 "GET /companies/ensemble/ HTTP/1.0" 200 9961 "UniverseBot/1.0"
#
User-agent: UniverseBot
Disallow: /

####	Exclude http://www.almaden.ibm.com/cs/crawler
#
#	We'd like to limit the sites crawling us to the main indexers.
#
User-agent: http://www.almaden.ibm.com/cs/crawler
Disallow: /

####	Exclude "SlySearch/1.0 http://www.plagiarism.org/crawler/robotinfo.html"
#
#	This site indexes article for plagiarism checks.
#
User-agent: SlySearch
Disallow: /

####	Exclude NG/1.0
#
#	On 18-Oct-2002 from remote host ng1.exabot.com
#
#	13:11:35 "GET /news/more/1005254413/d/redir/cb_order/UNRET2003.IR HTTP/1.0" 404 244 "NG/1.0"
#	13:11:37 "GET /news/more/1005254413/gi/tattletale/news/ HTTP/1.0" 404 234 "NG/1.0"
#	13:11:38 "GET /news/more/1005254413/ews/ HTTP/1.0" 404 219 "NG/1.0"
#
User-agent: NG/1.0
Disallow: /

####	Exclude spider from singingfish.com - no media to index.
#
User-agent: asterias
Disallow: /

####	Exclude spider from xo.net - no reason to index our files
#
User-agent: Gaisbot
Disallow: /

####	Exclude UbiCrawler
#
#	On 27-Sep-2003 from remote host ubi1.iit.cnr.it
#	  http://ubi.imc.pi.cnr.it/projects/ubicrawler/
#
User-agent: UbiCrawler
Disallow: /

####	Exclude Wget
#
#	It checks this only for recursive operations, not for indiv. files
#
User-agent: Wget
Disallow: /

####	Exclude TranSGeniKBot
#
User-agent: TranSGeniKBot
Disallow: /

####	Exclude Ocelli/1.1 (http://www.globalspec.com)
#
User-agent: Ocelli
Disallow: /

####	Exclude Exabot (http://www.exava.com/)
#
#	Doesn't honor global exclusions.
#
User-agent: Exabot
Disallow: /

####	Exclude Pompos (http://www.dir.com/)
#
#	Obscure search site - 1/4 of the URLs have %00 appended.
#
# Stupid thing requires *no* optional space after User-agent:
User-agent:Pompos
Disallow: /

####	Exclude larbin (http://freshmeat.net/projects/larbin/)
#
#	Open source spider that can be used by anyone. :-/
#
User-agent: larbin
Disallow: /

####	Exclude Nutch (http://www.nutch.org/docs/en/bot.html)
#
#	Open source spider that can be used by anyone. :-/
#
User-agent: Nutch
Disallow: /

####	Exclude Jetbot (http://www.jeteye.com/jetbot.html)
#
#	Doesn't honor global exclusions (it fetches /dl pages).
#
User-agent: Jetbot
Disallow: /

####	Exclude Yahoo Slurp (http://help.yahoo.com/l/us/yahoo/search/webcrawler/)
#
#	Slurps tons of binaries too, averaging 2 GB/day
#
User-agent: Slurp
Disallow: /

####	Exclude http://crawler.007ac9.net/
#
#	We'd like to limit the sites crawling us to the main indexers.
#
User-agent: 007ac9 Crawler
Disallow: /pub

####	Exclude http://www.cyotek.com/cyotek-webcopy
#
#	Offline viewing tool
#
User-agent: CyotekWebCrawler
Disallow: /

####	Exclude https://www.httrack.com/
#
#	Offline viewing tool
#
User-agent: HTTrack
Disallow: /

####	Exclude dotbot (http://www.opensiteexplorer.org/dotbot -> https://moz.com/researchtools/ose/dotbot)
#
User-agent: dotbot
Disallow: /

####	Exclude BLEXBot (http://webmeup-crawler.com/)
#
User-agent: BLEXBot
Disallow: /

####	Exclude serpstatbot (https://serpstatbot.com/)
#
User-agent: serpstatbot
Disallow: /

####	Exclude MJ12bot (http://mj12bot.com/)
#
#	Fetches lots of mangled (wrongly nested) paths.
#
User-agent: MJ12bot
Disallow: /

####	Exclude AhrefsBot (http://ahrefs.com/robot/)
#
User-agent: AhrefsBot
Disallow: /

####	Exclude Adsbot (https://seostar.co/robot/)
#
User-agent: Adsbot
Disallow: /

####    Exclude DataForSeoBot (https://dataforseo.com/dataforseo-bot)
#
User-agent: DataForSeoBot
Disallow: /

################################################################


################   Exclusions section for ALL robots   ####################
#
#	These are plain string patterns - not necessarily directory names -
#	so directories should have trailing slash if substring of another
#	directory name (like /a is a substring of /about).

# robots.txt generated at http://www.mcanerin.com
User-agent: *
Disallow: 
Disallow: /illust/addfav.php?*
Disallow: /illust/login.php?*
Disallow: /illust/thumbnails.php?album=*slideshow
Disallow: /cgi-bin/
Disallow: /api/
Disallow: /amfphp/
Disallow: /js/
Disallow: /css/
Disallow: /clips/bassabyss/game/