def crawl_stock_quote(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE, crawl_tick=False): # 抓取股票k线 for _, security_item in get_security_list(start=start_code, end=end_code).iterrows(): # 抓取日K线 logger.info("{} get stock kdata start".format(security_item['code'])) print("{} get stock kdata start".format(security_item['code'])) start_date = get_latest_download_trading_date(security_item, source='163') end_date = pd.Timestamp.today() if start_date > end_date: logger.info("{} stock kdata is ok".format(security_item['code'])) print("{} stock kdata is ok".format(security_item['code'])) else: process_crawl(StockKdataSpider163, {"security_item": security_item, "start_date": start_date, "end_date": end_date}) logger.info("{} get stock kdata from 163 end".format(security_item['code'])) print("{} get stock kdata from 163 end".format(security_item['code'])) base_dates = set(get_trading_dates(security_item, source='163')) for fuquan in ('bfq', 'hfq'): sina_dates = set(get_trading_dates(security_item, source='sina', fuquan=fuquan)) diff_dates = base_dates - sina_dates if diff_dates: logger.info("{} get {} kdata from sina start".format(security_item['code'], fuquan)) print("{} get {} kdata from sina start".format(security_item['code'], fuquan)) process_crawl(StockKDataSpider, {"security_item": security_item, "trading_dates": diff_dates, "fuquan": fuquan}) logger.info("{} get {} kdata from sina end".format(security_item['code'], fuquan)) print("{} get {} kdata from sina end".format(security_item['code'], fuquan)) else: logger.info("{} {} kdata from sina is ok".format(security_item['code'], fuquan)) print("{} {} kdata from sina is ok".format(security_item['code'], fuquan)) # 抓取tick if crawl_tick: tick_dates = {x for x in base_dates if x >= settings.START_TICK_DATE} diff_dates = tick_dates - set(get_available_tick_dates(security_item)) if diff_dates: logger.info("{} get tick start".format(security_item['code'])) process_crawl(StockTickSpider, {"security_item": security_item, "trading_dates": diff_dates}) logger.info("{} get tick end".format(security_item['code'])) else: logger.info("{} tick is ok".format(security_item['code']))
def yield_request(self, item, trading_dates=None): if not trading_dates: trading_dates = get_trading_dates(item) for trading_date in trading_dates: if get_datetime(trading_date) < get_datetime(settings.START_TICK_DATE) or get_datetime( trading_date) < get_datetime(settings.AVAILABLE_TICK_DATE): continue path = get_tick_path(item, trading_date) if os.path.exists(path): continue yield Request(url=self.get_tick_url(trading_date, item['exchange'] + item['code']), meta={'proxy': None, 'path': path, 'trading_date': trading_date, 'item': item}, headers=DEFAULT_TICK_HEADER, callback=self.download_tick)