# $MirOS: www/files/robots.txt,v 1.5 2013/04/07 14:00:46 tg Exp $ # Allow downloads User-Agent: wget Crawl-delay: 30 Disallow: # Disallow the W3C Linkchecker # XXX to be revisited User-Agent: W3C-checklink Crawl-delay: 5 # dynamically generated content Disallow: /htman Disallow: /man Disallow: /man1 Disallow: /man2 Disallow: /man3 Disallow: /man4 Disallow: /man5 Disallow: /man6 Disallow: /man7 Disallow: /man8 Disallow: /man9 Disallow: /cman/ Disallow: /r9man/ # content which is out of date Disallow: /?wlog-1 Disallow: /?wlog-2 Disallow: /?wlog-3 Disallow: /?wlog-4 Disallow: /?wlog-5 Disallow: /?wlog-6 # other not-to-be-listed content Disallow: /GBG/ Disallow: /MirOS/ Disallow: /Pkgs/ Disallow: /current/ Disallow: /d/ Disallow: /dist/ Disallow: /distfiles/ # This crawler operates at 1 Hz User-Agent: sitebot Crawl-delay: 7 Disallow: /cvs.cgi/ Disallow: /GBG/ # search machines shouldn't index dups Disallow: /man1 Disallow: /man2 Disallow: /man3 Disallow: /man4 Disallow: /man5 Disallow: /man6 Disallow: /man7 Disallow: /man8 Disallow: /man9 # nor the download area Disallow: /MirOS/ Disallow: /Pkgs/ Disallow: /current/ Disallow: /d/ Disallow: /dist/ Disallow: /distfiles/ # Disallow other bots some access User-Agent: * Crawl-delay: 8 Disallow: /GBG/ # search machines shouldn't index dups Disallow: /man1 Disallow: /man2 Disallow: /man3 Disallow: /man4 Disallow: /man5 Disallow: /man6 Disallow: /man7 Disallow: /man8 Disallow: /man9 # nor the download area Disallow: /MirOS/ Disallow: /Pkgs/ Disallow: /current/ Disallow: /d/ Disallow: /dist/ Disallow: /distfiles/ # temporarily, for eurynome's weakness Disallow: /cvs.cgi/