def from_robots(cls, robots_url): try: response = requests.get(robots_url, timeout=30) except requests.HTTPError as e: raise cls.RobotsLoadError("robots.txt from {} return {} status code".format(robots_url, e.response.status_code)) robots_txt = response.content sitemaps = cls.ROBOTS_SITEMAP_RE.findall(robots_txt) if not sitemaps: raise cls.RobotsParseError("robots.txt from {} sitemaps not found".format(robots_url)) sitemaps = set([s.strip() for s in sitemaps]) return [Sitemap.from_url(url) for url in sitemaps]
def from_url(cls, url): return Sitemap.from_url(url)