Пример #1
0
 def parse(self, response):
     d = listparser.parse(response.body)
     feeds = d.feeds
     for feed in feeds:
         item = PodsearchbotItem()
         item['link'] = feed.url
         yield item
Пример #2
0
 def parse_podcast_page(self, response):
     hxs = HtmlXPathSelector(response)
     podcast_url_xpath = "//table[@class='entry']//tr[1]/td/a[1]/@href"
     podcast_link = hxs.select(podcast_url_xpath).extract()
     try:
         item = PodsearchbotItem()
         item['link'] = podcast_link[1]
     except exceptions.IndexError:
         return
     yield item
Пример #3
0
 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     #podcast_urls_xpath = "/opml/body/outline/outline/@url"
     podcast_urls_xpath = "//outline/outline/@url"
     links = hxs.select(podcast_urls_xpath).extract()
     for link in links:
         if link.startswith('/'):
             link = self._baseUrl + link
         item = PodsearchbotItem()
         item['link'] = link
         yield item
Пример #4
0
    def parse_podcast_page(self, response):
        hxs = HtmlXPathSelector(response)
        podcast_url_xpath = "/html/body/div[@class='container']/div[@id='column']/div[@id='podcast']/div[@id='podcast_details']/div[@class='konafilter']/div[@class='pf_box_header right nomobile']/ul[@class='chicklets nomobile']/li[3]/a/@href"

        podcast_link = hxs.select(podcast_url_xpath).extract()
        if not podcast_link:
            return
        if podcast_link[0] == "#":
            return
        item = PodsearchbotItem()
        item['link'] = podcast_link[0]
        yield item
Пример #5
0
    def parse_podcast_page(self, response):
        hxs = HtmlXPathSelector(response)
        item = PodsearchbotItem()

        podcast_url_xpath = "/html/body/div[@class='container_20']/div[@id='teasertitle']/div[@class='teasertitle']/a/@href"
        link = hxs.select(podcast_url_xpath).extract()[0]
        if link.startswith('/'):
            link = self._baseUrl + link
        if link.startswith(self._baseUrl + '/podcast_url'):
            try:
                link = self.getContentLocation(link)
            except exceptions.KeyError:
                # broken link
                pass  # return
        item['link'] = link
        yield item
Пример #6
0
    def parse_podcast_page(self, response):
        hxs = HtmlXPathSelector(response)
        item = PodsearchbotItem()

        podcast_url_xpath = "/html/body/div[@id='page']/div[@id='content_home']/div[@id='content_podcast_col']/table/tbody/tr/td/table/tbody/tr/td[@class='feed_headbox'][1]/h1[@class='feed_head']/a/@href"
        try:
            link = hxs.select(podcast_url_xpath).extract()[0]
            if link.startswith('/'):
                link = self._baseUrl + link
            if link.startswith(self._baseUrl + '/feed_url'):
                try:
                    link = self.getContentLocation(link)
                except exceptions.KeyError:
                    # broken link
                    pass  # return
        except exceptions.IndexError:
            # no link
            pass
        item['link'] = link
        yield item
Пример #7
0
    def parse_podcast_page(self, response):
        hxs = HtmlXPathSelector(response)
        item = PodsearchbotItem()

        try:
            podcast_url_xpath = "//div[@id='content']//a[5]/@href"
            link = hxs.select(podcast_url_xpath).extract()[0]
            if not link.startswith('/community/map;show=') and \
               not link.startswith('http://podster.de/view/'):
                item['link'] = link
        except IndexError:
            pass
        try:
            podcast_url_xpath = "//div[@id='content']//a[4]/@href"
            link = hxs.select(podcast_url_xpath).extract()[0]
            if not link.startswith('/community/map;show=') and \
               not link.startswith('http://podster.de/view/'):
                item['link'] = link
        except IndexError:
            pass
        try:
            podcast_url_xpath = "//div[@id='content']//div[@class='boxcontent']/a[2]/@href"
            link = hxs.select(podcast_url_xpath).extract()[0]
            if not link.startswith('/community/map;show=') and \
               not link.startswith('http://podster.de/view/'):
                item['link'] = link
        except IndexError:
            pass
        try:
            link = item['link']
        except KeyError:
            print((
                'PodsterDe: WARNING: The page %s did not contain a link to a feed.'
                % response.url))
            return
        yield item