Пример #1
0
    def start_requests(self):
        """
        入口准备
        :return:
        """
        boot_url = 'http://weixin.sogou.com/weixin'

        task_id = pop_task(self.name)

        if not task_id:
            print('%s task is empty' % self.name)
            return

        task_item = get_item(FetchTask, task_id)

        cookies_id, cookies = get_cookies(self.name)
        url_params = {
            'type': 1,
            # 'query': task_item.follow_id,
            'query': task_item.follow_name.encode('utf-8'),
        }
        url_profile = get_update_url(boot_url, url_params)
        meta = {
            'cookiejar': cookies_id,
            'task_id': task_item.id,
            'platform_id': task_item.platform_id,
            'channel_id': task_item.channel_id,
            'follow_id': task_item.follow_id,
            'follow_name': task_item.follow_name,
        }

        yield scrapy.Request(url=url_profile,
                             cookies=cookies,
                             callback=self.parse_account_search_list,
                             meta=meta)
Пример #2
0
    def start_requests(self):
        """
        入口准备
        :return:
        """
        url_params = {
            'version_code': '6.4.2',
            'version_name': '',
            'device_platform': 'iphone',
            'tt_from': 'weixin',
            'utm_source': 'weixin',
            'utm_medium': 'toutiao_ios',
            'utm_campaign': 'client_share',
            'wxshare_count': '1',
        }

        task_id = pop_task(self.name)

        if not task_id:
            print('%s task is empty' % self.name)
            return
        print('%s task id: %s' % (self.name, task_id))

        task_item = get_item(FetchTask, task_id)
        fetch_url = 'http://m.toutiao.com/profile/%s/' % task_item.follow_id
        url_profile = get_update_url(fetch_url, url_params)
        meta = {
            'task_id': task_item.id,
            'platform_id': task_item.platform_id,
            'channel_id': task_item.channel_id,
            'follow_id': task_item.follow_id,
            'follow_name': task_item.follow_name,
        }
        yield scrapy.Request(url=url_profile, callback=self.get_profile, meta=meta)
Пример #3
0
    def get_article_task(self):
        """
        文章抓取入口
        :return:
        """
        task_id = pop_task(self.name)

        if not task_id:
            print('%s task is empty' % self.name)
            return
        print('%s task id: %s' % (self.name, task_id))

        task_item = get_item(FetchTask, task_id)

        article_id = task_item.follow_id

        article_list_url = 'https://weibo.com/p/%s/wenzhang' % article_id

        meta = {
            'task_id': task_item.id,
            'platform_id': task_item.platform_id,
            'channel_id': task_item.channel_id,
            'follow_id': task_item.follow_id,
            'follow_name': task_item.follow_name,
        }

        yield scrapy.Request(url=article_list_url,
                             callback=self.parse_article_list,
                             meta=meta)