# Robots Exclusion Protocol # Filename: robots.txt # Author: Mark Garrison (mark@scarabmedia.com) # Date: 10/16/2007 # # Set directories & files to be Disallow/Allow # NOTE: directives for Disallow/Allow are case-sensitive! # Use $ to anchor the match to the end of a URL string # As in disallowing or allowing files of a particular extension # # See http://www.robotstxt.org/wc/norobots.html for full specifications # User-agent: * Crawl-delay: 300 # Standard disallows for Scarab Media hosted websites Disallow: /cgi-bin/ Disallow: /log/ Disallow: /controlpanel/ Disallow: /data/ Disallow: /*.mdb$ Disallow: /*_setup*.* Disallow: /*_install*.* Disallow: /*_admin*.* Disallow: /*admin*/ Disallow: /media/ Disallow: /*.flv$ Disallow: /*.swf$ Disallow: /*.rm$ Disallow: /*.wmv$ Disallow: /*.mov$ Disallow: /*.mp3$ Disallow: /*.wma$ Disallow: /*.ram$ Disallow: /*.zip$ Disallow: /*.rar$ # Disallow to log and trap bots that don't abide by these directives # Must setup robot-trap utility to function properly # Otherwise leave commented out. #Disallow: /robot-trap/ # Extended Parameters # Under Proposal, not universally recognized commands # maximum rate of 1 page every 300 seconds Request-rate: 1/300 # only visit between 9:00 AM and 2:00 PM UT (1:00-6:00AM PST) Visit-time: 0900-1400