Exemplo n.º 1
0
    def _parse_attachment_ids_request_query(self, page, since=None):
        # Formats
        digits = re.compile("\d+")
        attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
        # if no date is given, return all ids
        if not since:
            attachment_links = SoupStrainer("a", href=attachment_href)
            return [
                int(digits.search(tag["href"]).group(0))
                for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)
            ]

        # Parse the main table only
        date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
        mtab = SoupStrainer("table", {"class": "requests"})
        soup = BeautifulSoup(page, parseOnlyThese=mtab)
        patch_ids = []

        for row in soup.findAll("tr"):
            patch_tag = row.find("a", {"href": attachment_href})
            if not patch_tag:
                continue
            patch_id = int(digits.search(patch_tag["href"]).group(0))
            date_tag = row.find("td", text=date_format)
            if date_tag and datetime.strptime(
                    date_format.search(date_tag).group(0),
                    "%Y-%m-%d %H:%M") < since:
                continue
            patch_ids.append(patch_id)
        return patch_ids
Exemplo n.º 2
0
 def _parse_attachment_ids_request_query(self, page):
     digits = re.compile("\d+")
     attachment_href = re.compile("attachment.cgi\?id=\d+&action=review")
     attachment_links = SoupStrainer("a", href=attachment_href)
     return [
         int(digits.search(tag["href"]).group(0))
         for tag in BeautifulSoup(page, parseOnlyThese=attachment_links)
     ]