Exemplo n.º 1
0
 def parse_date(self, response):
     sel = Selector(response)
     title = tostr(sel.xpath("//title/text()"))
     m = date_re.search(title)
     if not m:
         raise DateParseError(title)
     date = m.group("date").replace(".", "-")
     return date
Exemplo n.º 2
0
    def request_from_link(self, response, link):
        url = tostr(link.xpath("./@href"))

        if url.startswith("javascript"):
            return self.request_minutes(response, link)

        url = urljoin(response.url, url)
        return Request(url=url, headers=self.headers, callback=self.parse_list)
Exemplo n.º 3
0
 def parse_date(self, response):
     sel = Selector(response)
     title = tostr(sel.xpath('//title/text()'))
     m = date_re.search(title)
     if not m:
         raise DateParseError(title)
     date = m.group('date').replace('.', '-')
     return date
Exemplo n.º 4
0
    def request_from_link(self, response, link):
        url = tostr(link.xpath('./@href'))

        if url.startswith('javascript'):
            return self.request_minutes(response, link)

        url = urljoin(response.url, url)
        return Request(url=url, headers=self.headers, callback=self.parse_list)
Exemplo n.º 5
0
    def request_minutes(self, response, link):
        onclick = tostr(link.xpath("./@onclick"))

        m = minutes_re.match(onclick)
        if not m:
            raise UrlParseError(onclick)

        url = m.group("url")
        url = url.replace("frame.php", "viewer.total.php")
        url = urljoin(response.url, url)

        return Request(url=url, headers=self.headers, callback=self.parse_minutes)
Exemplo n.º 6
0
    def request_minutes(self, response, link):
        onclick = tostr(link.xpath('./@onclick'))

        m = minutes_re.match(onclick)
        if not m:
            raise UrlParseError(onclick)

        url = m.group('url')
        url = url.replace('frame.php', 'viewer.total.php')
        url = urljoin(response.url, url)

        return Request(url=url,
                       headers=self.headers,
                       callback=self.parse_minutes)