Exemplo n.º 1
0
def crawl_stock_quote(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE, crawl_tick=False):
    # 抓取股票k线
    for _, security_item in get_security_list(start=start_code, end=end_code).iterrows():
        # 抓取日K线
        logger.info("{} get stock kdata start".format(security_item['code']))
        print("{} get stock kdata start".format(security_item['code']))

        start_date = get_latest_download_trading_date(security_item, source='163')
        end_date = pd.Timestamp.today()
        if start_date > end_date:
            logger.info("{} stock kdata is ok".format(security_item['code']))
            print("{} stock kdata is ok".format(security_item['code']))
        else:
            process_crawl(StockKdataSpider163, {"security_item": security_item,
                                                "start_date": start_date,
                                                "end_date": end_date})

        logger.info("{} get stock kdata from 163 end".format(security_item['code']))
        print("{} get stock kdata from 163 end".format(security_item['code']))

        base_dates = set(get_trading_dates(security_item, source='163'))
        for fuquan in ('bfq', 'hfq'):
            sina_dates = set(get_trading_dates(security_item, source='sina', fuquan=fuquan))
            diff_dates = base_dates - sina_dates
            if diff_dates:
                logger.info("{} get {} kdata from sina start".format(security_item['code'], fuquan))
                print("{} get {} kdata from sina start".format(security_item['code'], fuquan))
                process_crawl(StockKDataSpider, {"security_item": security_item,
                                                 "trading_dates": diff_dates,
                                                 "fuquan": fuquan})
                logger.info("{} get {} kdata from sina end".format(security_item['code'], fuquan))
                print("{} get {} kdata from sina end".format(security_item['code'], fuquan))
            else:
                logger.info("{} {} kdata from sina is ok".format(security_item['code'], fuquan))
                print("{} {} kdata from sina is ok".format(security_item['code'], fuquan))


        # 抓取tick
        if crawl_tick:
            tick_dates = {x for x in base_dates if x >= settings.START_TICK_DATE}
            diff_dates = tick_dates - set(get_available_tick_dates(security_item))

            if diff_dates:
                logger.info("{} get tick start".format(security_item['code']))
                process_crawl(StockTickSpider, {"security_item": security_item,
                                                "trading_dates": diff_dates})
                logger.info("{} get tick end".format(security_item['code']))
            else:
                logger.info("{} tick is ok".format(security_item['code']))
Exemplo n.º 2
0
    def yield_request(self, item, trading_dates=None):
        if not trading_dates:
            trading_dates = get_trading_dates(item)

        for trading_date in trading_dates:
            if get_datetime(trading_date) < get_datetime(settings.START_TICK_DATE) or get_datetime(
                    trading_date) < get_datetime(settings.AVAILABLE_TICK_DATE):
                continue
            path = get_tick_path(item, trading_date)

            if os.path.exists(path):
                continue
            yield Request(url=self.get_tick_url(trading_date, item['exchange'] + item['code']),
                          meta={'proxy': None,
                                'path': path,
                                'trading_date': trading_date,
                                'item': item},
                          headers=DEFAULT_TICK_HEADER,
                          callback=self.download_tick)