def crawl_index_quote(): for _, security_item in get_security_list(security_type='index').iterrows(): # 抓取日K线 logger.info("{} get index kdata start".format(security_item['code'])) start_date = get_latest_download_trading_date(security_item, source='163') end_date = pd.Timestamp.today() if start_date > end_date: logger.info("{} kdata is ok".format(security_item['code'])) else: process_crawl(StockKdataSpider163, {"security_item": security_item, "start_date": start_date, "end_date": end_date}) logger.info("{} get index kdata from 163 end".format(security_item['code'])) # 获取市场概况数据[上海,深圳,中小板,创业板] if security_item['id'] in ['index_sh_000001', 'index_sz_399106', 'index_sz_399005', 'index_sz_399006']: # if security_item['id'] in ['index_sz_399106', 'index_sz_399005', 'index_sz_399006']: df = get_kdata(security_item=security_item) df = df[df['turnoverRate'].isna() | df['tCap'].isna() | df['mCap'].isna() | df[ 'pe'].isna()] if not df.empty: dates = df.index.strftime('%Y-%m-%d').tolist() # if security_item['id'] == 'index_sz_399106': # dates = [the_date for the_date in dates if # pd.Timestamp(the_date).date().year >= 2018] if dates: process_crawl(StockSummarySpider, {"security_item": security_item, "the_dates": dates})
def fetch_kdata(exchange_str='bitstamp'): ccxt_exchange = eval("ccxt.{}()".format(exchange_str)) if ccxt_exchange.has['fetchOHLCV']: for _, security_item in get_security_list(security_type='cryptocurrency', exchanges=[exchange_str]).iterrows(): try: if security_item['name'] not in CRYPTOCURRENCY_PAIR: continue start_date, df = get_latest_download_trading_date(security_item) # 日K线只抓到昨天 end_date = pd.Timestamp.today() - pd.DateOffset(1) if start_date and (start_date > end_date): logger.info("{} kdata is ok".format(security_item['code'])) continue try: kdatas = ccxt_exchange.fetch_ohlcv(security_item['name'], timeframe='1d') # for rateLimit time.sleep(5) except Exception as e: logger.exception("fetch_kdata for {} {} failed".format(exchange_str, security_item['name']), e) continue for kdata in kdatas: timestamp = pd.Timestamp.fromtimestamp(int(kdata[0] / 1000)) if is_same_date(timestamp, pd.Timestamp.today()): continue kdata_json = { 'timestamp': to_time_str(timestamp), 'code': security_item['code'], 'name': security_item['name'], 'open': kdata[1], 'high': kdata[2], 'low': kdata[3], 'close': kdata[4], 'volume': kdata[5], 'securityId': security_item['id'], 'preClose': None, 'change': None, 'changePct': None } df = df.append(kdata_json, ignore_index=True) if not df.empty: df = df.loc[:, KDATA_COMMON_COL] kdata_df_save(df, get_kdata_path(security_item), calculate_change=True) logger.info( "fetch_kdata for exchange:{} security:{} success".format(exchange_str, security_item['name'])) except Exception as e: logger.info( "fetch_kdata for exchange:{} security:{} failed".format(exchange_str, security_item['name'], e)) else: logger.warning("exchange:{} not support fetchOHLCV".format(exchange_str))
def crawl_stock_quote(start_code=STOCK_START_CODE, end_code=STOCK_END_CODE, crawl_tick=False): # 抓取股票k线 for _, security_item in get_security_list(start=start_code, end=end_code).iterrows(): # 抓取日K线 logger.info("{} get stock kdata start".format(security_item['code'])) print("{} get stock kdata start".format(security_item['code'])) start_date = get_latest_download_trading_date(security_item, source='163') end_date = pd.Timestamp.today() if start_date > end_date: logger.info("{} stock kdata is ok".format(security_item['code'])) print("{} stock kdata is ok".format(security_item['code'])) else: process_crawl(StockKdataSpider163, {"security_item": security_item, "start_date": start_date, "end_date": end_date}) logger.info("{} get stock kdata from 163 end".format(security_item['code'])) print("{} get stock kdata from 163 end".format(security_item['code'])) base_dates = set(get_trading_dates(security_item, source='163')) for fuquan in ('bfq', 'hfq'): sina_dates = set(get_trading_dates(security_item, source='sina', fuquan=fuquan)) diff_dates = base_dates - sina_dates if diff_dates: logger.info("{} get {} kdata from sina start".format(security_item['code'], fuquan)) print("{} get {} kdata from sina start".format(security_item['code'], fuquan)) process_crawl(StockKDataSpider, {"security_item": security_item, "trading_dates": diff_dates, "fuquan": fuquan}) logger.info("{} get {} kdata from sina end".format(security_item['code'], fuquan)) print("{} get {} kdata from sina end".format(security_item['code'], fuquan)) else: logger.info("{} {} kdata from sina is ok".format(security_item['code'], fuquan)) print("{} {} kdata from sina is ok".format(security_item['code'], fuquan)) # 抓取tick if crawl_tick: tick_dates = {x for x in base_dates if x >= settings.START_TICK_DATE} diff_dates = tick_dates - set(get_available_tick_dates(security_item)) if diff_dates: logger.info("{} get tick start".format(security_item['code'])) process_crawl(StockTickSpider, {"security_item": security_item, "trading_dates": diff_dates}) logger.info("{} get tick end".format(security_item['code'])) else: logger.info("{} tick is ok".format(security_item['code']))