Exemplo n.º 1
0
def main(**kwargs):
    sd = kwargs.get('sd', '')
    ed = kwargs.get('ed', '')
    interval = kwargs.get('interval', 60)
    date_list = util.specified_date(sd, ed)

    data = [{'url': '1'}, {'url': '2'}]
    while 1:
        proxy = util.get_prolist(10)
        for _data in data:
            url = _data.get('url', '')
            if not url:
                continue
            fetch_data(url=url, proxy=proxy, headers=default_headers, **kwargs)

            '''
            #根据url规律进行控制
            '''
            for str_time in date_list:
                pass

        if not interval:
            break
        print('-------------- sleep %s sec -------------' % interval)
        time.sleep(interval)
Exemplo n.º 2
0
    def parse_resp(self, resp):
        '''
        第一层处理,类别获取后进行下一层抓取
        :param resp: 
        :return: 
        '''
        item = GoodsItem()
        category = []
        date_list = util.specified_date(self.start_date,
                                        end_date=self.end_date)
        for category_url in category:
            if self.abbreviation and self.abbreviation not in category_url:
                # 非指定的数据不进行抓取(指定彩种的情况下使用该选项)
                continue
            '''
            抓取规则
            '''
            today_url = ''
            # 获取保存的数据库
            result_key = category_url.split('-')[1]
            demo_test = config.PKS_KEY_DICT.get(result_key, '')

            for history_date in date_list:
                date_time = ''.join(history_date.split('-'))
                url = today_url.replace('today', date_time)
                yield scrapy.Request(url=url,
                                     headers=self.headers,
                                     callback=self.parse_product,
                                     meta={'item': item})