Crawler, Spider über htaccess. und robots.txt ausschließen Guten Abend,
ich versuche mich gerade im Ausschließen von Crawlern, bekomme das aber auf eigene Faust nicht hin.
Ich hatte über das Internet folgende "Auschluss-Datei" gefunden, die ich gerne übernehmen würde...:
Inhalt der Datei robots.txt. : Code:
User-agent: SEOkicks
Disallow: /
User-agent: SEOkicks-Robot
Disallow: /
User-agent: sistrix
Disallow: /
User-agent: MajesticSEO
Disallow: /
User-agent: BacklinkCrawler
Disallow: /
User-agent: xovi
Disallow: /
User-agent: XoviBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: spbot
Disallow: /
User-agent: SearchmetricsBot
Disallow: /
User-agent: search17
Disallow: /
User-agent: AhrefsBot
Disallow: /
User-agent: ia_archiver
Disallow: /
User-agent: TurnitinBot
Disallow: /
User-agent: SlySearch
Disallow: /
User-agent: findlinks
Disallow: /
User-agent: magpie-crawler
Disallow: /
User-agent: Pixray-Seeker
Disallow: /
User-agent: 008
Disallow: /
User-agent: Ezooms
Disallow: /
User-agent: lb-spider
Disallow: /
User-agent: WBSearchBot
Disallow: /
User-agent: psbot
Disallow: /
User-agent: HuaweiSymantecSpider
Disallow: /
User-agent: EC2LinkFinder
Disallow: /
User-agent: htdig
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: discobot
Disallow: /
User-agent: linkdex.com
Disallow: /
User-agent: SeznamBot
Disallow: /
User-agent: EdisterBot
Disallow: /
User-agent: SWEBot
Disallow: /
User-agent: picmole
Disallow: /
User-agent: Yeti
Disallow: /
User-agent: Yeti-Mobile
Disallow: /
User-agent: PagePeeker
Disallow: /
User-agent: CatchBot
Disallow: /
User-agent: yacybot
Disallow: /
User-agent: netEstateNECrawler
Disallow: /
User-agent: SurveyBot
Disallow: /
User-agent: COMODOSSLChecker
Disallow: /
User-agent: Comodo-Certificates-Spider
Disallow: /
User-agent: gonzo
Disallow: /
User-agent: schrein
Disallow: /
User-agent: AfiliasWebMiningTool
Disallow: /
User-agent: suggybot
Disallow: /
User-agent: bdbrandprotect
Disallow: /
User-agent: BPImageWalker
Disallow: /
User-agent: Updownerbot
Disallow: /
User-agent: lex
Disallow: /
User-agent: ContentCrawler
Disallow: /
User-agent: DCPbot
Disallow: /
User-agent: KaloogaBot
Disallow: /
User-agent: MLBot
Disallow: /
User-agent: iCjobs
Disallow: /
User-agent: oBot
Disallow: /
User-agent: WebmasterCoffee
Disallow: /
User-agent: Qualidator
Disallow: /
User-agent: Webinator
Disallow: /
User-agent: Scooter
Disallow: /
User-agent: thunderstone
Disallow: /
User-agent: larbin
Disallow: /
User-agent: OpidooBOT
Disallow: /
User-agent: ips-agent
Disallow: /
User-agent: TinEye
Disallow: /
User-agent: UnisterBot
Disallow: /
User-agent: Unister
Disallow: /
User-agent: ReverseGet
Disallow: /
User-agent: DotBot
Disallow: / Inhalt der Datei .htaccess. : Code:
RewriteEngine On
RewriteCond %{HTTP_USER_AGENT} ^SEOkicks [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SEOkicks-Robot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^sistrix [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^MajesticSEO [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^BacklinkCrawler [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^xovi [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^XoviBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^MJ12bot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^spbot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SearchmetricsBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^search17 [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^AhrefsBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^ia_archiver [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^TurnitinBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SlySearch [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^findlinks [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^magpie-crawler [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Pixray-Seeker [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^008 [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Ezooms [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^lb-spider [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^WBSearchBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^psbot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^HuaweiSymantecSpider [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^EC2LinkFinder [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^htdig [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SemrushBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^discobot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^linkdex.com [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SeznamBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^EdisterBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SWEBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^picmole [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Yeti [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Yeti-Mobile [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^PagePeeker [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^CatchBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^yacybot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^netEstateNECrawler [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^SurveyBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^COMODOSSLChecker [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Comodo-Certificates-Spider [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^gonzo [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^schrein [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^AfiliasWebMiningTool [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^suggybot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^bdbrandprotect [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^BPImageWalker [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Updownerbot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^lex [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^ContentCrawler [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^DCPbot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^KaloogaBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^MLBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^iCjobs [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^oBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^WebmasterCoffee [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Qualidator [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Webinator [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Scooter [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^thunderstone [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^larbin [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^OpidooBOT [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^ips-agent [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^TinEye [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^UnisterBot [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^Unister [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^ReverseGet [NC]
RewriteRule ^.* - [F,L]
RewriteCond %{HTTP_USER_AGENT} ^DotBot [NC]
RewriteRule ^.* - [F,L] Und warum soll man Google-Bot nicht ausschließen? Sind diese Bots denn für irgendetwas gut, außer dass sie persönliche Daten abgreifen können?
Freue mich über Antwort,
LG Nanafa |