def parse(self, url, hxs, wd_rpc, thumbnail_url, access_denied_cookie=None):
        title = self.get_value_response(hxs, self.page_selector_dict['title'])
        image_src = self.get_value_response(hxs, self.page_selector_dict['image'])
        content = self.get_all_value_response(hxs, self.page_selector_dict['content'])
        tags = hxs.xpath(self.page_selector_dict['tags']).extract()

        item = WDPost.get_default(url, self.url_from, title, image_src, thumbnail_url, content, tags)

        post_id = wd_rpc.post_to_wd(item)

        return item
Пример #2
0
    def parse(self, url, hxs, wd_rpc, thumbnail_url, access_denied_cookie):
        title = self.get_value_response(hxs, self.page_selector_dict['title'])
        image_src = self.get_value_response(hxs, self.page_selector_dict['image'])
        content = self.get_all_value_response(hxs, self.page_selector_dict['content'])

        # not found any tags on the detailed page.
        tags = []

        item = WDPost.get_default(url, self.url_from, title, image_src, thumbnail_url, content, tags,
                                  access_denied_cookie=access_denied_cookie)

        post_id = wd_rpc.post_to_wd(item)

        return item
Пример #3
0
    def parse(self,
              url,
              hxs,
              wd_rpc,
              thumbnail_url,
              access_denied_cookie=None):
        title = self.get_value_response(hxs, self.page_selector_dict['title'])
        image_src = self.get_value_response(hxs,
                                            self.page_selector_dict['image'])
        content = self.get_all_value_response(
            hxs, self.page_selector_dict['content'])
        tags = hxs.xpath(self.page_selector_dict['tags']).extract()

        item = WDPost.get_default(url, self.url_from, title, image_src,
                                  thumbnail_url, content, tags)

        post_id = wd_rpc.post_to_wd(item)

        return item
Пример #4
0
    def parse(self, url, hxs, wd_rpc, thumbnail_url, access_denied_cookie):
        title = self.get_value_response(hxs, self.page_selector_dict['title'])
        image_src = self.get_value_response(hxs,
                                            self.page_selector_dict['image'])
        content = self.get_all_value_response(
            hxs, self.page_selector_dict['content'])

        # not found any tags on the detailed page.
        tags = []

        item = WDPost.get_default(url,
                                  self.url_from,
                                  title,
                                  image_src,
                                  thumbnail_url,
                                  content,
                                  tags,
                                  access_denied_cookie=access_denied_cookie)

        post_id = wd_rpc.post_to_wd(item)

        return item