def get_robots_parser(self, url: str): rp = RobotFileParser() if self.store.exists(url, 'txt'): body = self.store.load_url(url, 'txt') else: page, status_code = download_page(url, 'Robot') body = page.body if status_code in [401, 403]: body = self.DISALLOW_ALL elif 400 <= status_code < 500: # including status_code 404 body = self.ALLOW_ALL self.store.save_url(url, body, 'txt') if body.strip() == self.ALLOW_ALL: rp.allow_all = True elif body.strip() == self.DISALLOW_ALL: rp.disallow_all = True else: rp.parse(body.decode('utf-8').splitlines()) return rp