GIF89a=( ý' 7IAXKgNgYvYx\%wh&h}týh%ýs%xý}9ýRýý&ý0%ý (ý.ýý5ýSDýý&ýa)ýx5ýý;c*!&r)ï7õ<{4ý3ýH§KoTýýYýaqýýqýýFý !ý ' !ýNETSCAPE2.0 , =( ýýpH,ý$rýl:x(tJýZý,výýzýýxL.:ýýzýnýýý|Nýýýýý~ýýýýýýý& !ý0`9Rý}ýý"ý"a:Sý~xýýýýýýýýgýýýEýýýýýýýRýýýEýýýýBýý ýý8ýýDýýý"ýný ýHýýLýýDkDýBýýýýýDýýýTýýýH ýGýýA Rý |ýým&ýýE8ýSýkGýAýpxýaýýýR2XBýýE8Iýýý6Xý:vT)ý~ýýqýåýý"F~%xý ý 4#Zý0O|-4BsýX:= Qý SalýýyXJ`G&|shýýK3l7ýB|ý$'7J©*0!ýýDýn=ýPýýýýý0`ýRýljýýýýv>ýýý5 ý.69ýødýýýýýnlvý9ýýf{ýýýPbxýl5}ýpýýýýý3aýýýIýOýýýý!>ýýýiýý9ýý#ýý)pýa ½ ý{ý)vmýý%D~6fýýs}RýDýW Eý`!ý ý&L8xý ý{)x`X/>ý}mýýRý*|`Dý=ý_ ^ý5!_&'aýOý7ýcýý`DCx`ý¥ý9ýYýFýýý`?ýý"ý ýn@`ý} lýý@4>ýd S ývýxNýý"@~dýý=ýgýs~Gýýýýýýud &p8Qý)«lXDýýýýA~HýySunýjýýýk*DýLHý] ýýC"JýýXb~ªwSt}6K,ýýqýS:9*:ýýýlý@ý`ýý ý.ìýt9ýSý[©:ýý=`9Nýýýý{¿ýA !Rý:ýýý6ýýxý0ý_ ý;ýýýýýý^ýýý#ýýýý!ýýýýUýýý;0L1ýýýýýp%AýýU,uýý%ýSýý!ýýý~`ýGýýýý ýýý=4ýnpý3ýýýýýýýýýuýuýn|%2ýIýýrý#0ýýJ``8ý@S@5ýýýý^`8Eý]ý.ýSýýý7 ý ý0ýj SýDý zýýýiýSýýýýý!ýýýlýýw9*ýDýIýnEXýýý &AýGoýQfýýFýý;ýýý}ýJýýýýF5ýýQ|ýýýXýýTýýyýýý]ý o ýýC=ýý:ýýýPB@ DýSý(>ýCýx}`ýýxJ,ýàýýp+eE0`ý}`Aý/NEýý ý9@ýýý Hý7ý!%B0`ýl*ýý!8 2ý%ý ý:ý1ý0Eýýux%nP1ý!ýC)ýP81lýxF#¬{ýýýýB0>ýý
Server IP : 217.18.85.50 / Your IP : 3.145.79.214 Web Server : LiteSpeed System : Linux server50.tr85.dhs.com.tr 3.10.0-962.3.2.lve1.5.85.el7.x86_64 #1 SMP Thu Apr 18 15:18:36 UTC 2024 x86_64 User : ferhatgenc ( ) PHP Version : 7.2.34 Disable Function : restore_ini,mail,openbasedir,f_open,system,dl,array_compare,array_user_key_compare,passthru,cat,exec,popen,proc_close,proc_get_status,proc_nice,proc_open,escapeshellcmd,escapeshellarg,show_source,posix_mkfifo,ini_restore,mysql_list_dbs,getmyuid,pconnect,link,symlink,fin,passthruexec,fileread,shell_exec,pcntl_exec,ini_alter,leak,apache_child_terminate,chown,posix_kill,posix_setpgid,posix_setsid,posix_setuid,proc_terminate,syslog,allow_url_fopen,fpassthru,execute,shell,chgrp,passthru,socket_select,socket_create,socket_create_listen,socket_create_pair,socket_listen,socket_accept,socket_bind,foreach,socket_strerror,pcntl_fork,pcntl_signal,pcntl_waitpid,pcntl_wexitstatus,pcntl_wifexited,pcntl_wifsignaled,pcntl_wifstopped,pcntl_wstopsig,pcntl_wtermsig,openlog,apache_get_version,apache_getenv,apache_note,apache_setenv,virtualal MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : OFF | Pkexec : OFF Directory : /usr/lib64/python2.7/ |
Upload File : |
""" robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html """ import urlparse import urllib __all__ = ["RobotFileParser"] class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. """ def __init__(self, url=''): self.entries = [] self.default_entry = None self.disallow_all = False self.allow_all = False self.set_url(url) self.last_checked = 0 def mtime(self): """Returns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. """ return self.last_checked def modified(self): """Sets the time the robots.txt file was last fetched to the current time. """ import time self.last_checked = time.time() def set_url(self, url): """Sets the URL referring to a robots.txt file.""" self.url = url self.host, self.path = urlparse.urlparse(url)[1:3] def read(self): """Reads the robots.txt URL and feeds it to the parser.""" opener = URLopener() f = opener.open(self.url) lines = [line.strip() for line in f] f.close() self.errcode = opener.errcode if self.errcode in (401, 403): self.disallow_all = True elif self.errcode >= 400: self.allow_all = True elif self.errcode == 200 and lines: self.parse(lines) def _add_entry(self, entry): if "*" in entry.useragents: # the default entry is considered last if self.default_entry is None: # the first default entry wins self.default_entry = entry else: self.entries.append(entry) def parse(self, lines): """parse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines.""" # states: # 0: start state # 1: saw user-agent line # 2: saw an allow or disallow line state = 0 linenumber = 0 entry = Entry() for line in lines: linenumber += 1 if not line: if state == 1: entry = Entry() state = 0 elif state == 2: self._add_entry(entry) entry = Entry() state = 0 # remove optional comment and strip line i = line.find('#') if i >= 0: line = line[:i] line = line.strip() if not line: continue line = line.split(':', 1) if len(line) == 2: line[0] = line[0].strip().lower() line[1] = urllib.unquote(line[1].strip()) if line[0] == "user-agent": if state == 2: self._add_entry(entry) entry = Entry() entry.useragents.append(line[1]) state = 1 elif line[0] == "disallow": if state != 0: entry.rulelines.append(RuleLine(line[1], False)) state = 2 elif line[0] == "allow": if state != 0: entry.rulelines.append(RuleLine(line[1], True)) state = 2 if state == 2: self._add_entry(entry) def can_fetch(self, useragent, url): """using the parsed robots.txt decide if useragent can fetch url""" if self.disallow_all: return False if self.allow_all: return True # search for given user agent matches # the first match counts parsed_url = urlparse.urlparse(urllib.unquote(url)) url = urlparse.urlunparse(('', '', parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) url = urllib.quote(url) if not url: url = "/" for entry in self.entries: if entry.applies_to(useragent): return entry.allowance(url) # try the default entry last if self.default_entry: return self.default_entry.allowance(url) # agent not found ==> access granted return True def __str__(self): return ''.join([str(entry) + "\n" for entry in self.entries]) class RuleLine: """A rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.""" def __init__(self, path, allowance): if path == '' and not allowance: # an empty value means allow all allowance = True self.path = urllib.quote(path) self.allowance = allowance def applies_to(self, filename): return self.path == "*" or filename.startswith(self.path) def __str__(self): return (self.allowance and "Allow" or "Disallow") + ": " + self.path class Entry: """An entry has one or more user-agents and zero or more rulelines""" def __init__(self): self.useragents = [] self.rulelines = [] def __str__(self): ret = [] for agent in self.useragents: ret.extend(["User-agent: ", agent, "\n"]) for line in self.rulelines: ret.extend([str(line), "\n"]) return ''.join(ret) def applies_to(self, useragent): """check if this entry applies to the specified agent""" # split the name token and make it lower case useragent = useragent.split("/")[0].lower() for agent in self.useragents: if agent == '*': # we have the catch-all agent return True agent = agent.lower() if agent in useragent: return True return False def allowance(self, filename): """Preconditions: - our agent applies to this entry - filename is URL decoded""" for line in self.rulelines: if line.applies_to(filename): return line.allowance return True class URLopener(urllib.FancyURLopener): def __init__(self, *args): urllib.FancyURLopener.__init__(self, *args) self.errcode = 200 def prompt_user_passwd(self, host, realm): ## If robots.txt file is accessible only with a password, ## we act as if the file wasn't there. return None, None def http_error_default(self, url, fp, errcode, errmsg, headers): self.errcode = errcode return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, errmsg, headers)