예제 #1
0
 def get_robots_parser(self, url: str):
     rp = RobotFileParser()
     if self.store.exists(url, 'txt'):
         body = self.store.load_url(url, 'txt')
     else:
         page, status_code = download_page(url, 'Robot')
         body = page.body
         if status_code in [401, 403]:
             body = self.DISALLOW_ALL
         elif 400 <= status_code < 500:  # including status_code 404
             body = self.ALLOW_ALL
         self.store.save_url(url, body, 'txt')
     if body.strip() == self.ALLOW_ALL:
         rp.allow_all = True
     elif body.strip() == self.DISALLOW_ALL:
         rp.disallow_all = True
     else:
         rp.parse(body.decode('utf-8').splitlines())
     return rp