def add_factor_to_163(security_item): path_163 = get_kdata_path(security_item, source='163', fuquan='bfq') df_163 = pd_read_csv(path_163) if 'factor' in df_163.columns: df = df_163[df_163['factor'].isna()] if df.empty: logger.info("{} 163 factor is ok", security_item['code']) return path_sina = get_kdata_path(security_item, source='sina', fuquan='hfq') df_sina = pd_read_csv(path_sina) df_sina = df_sina[~df_sina.index.duplicated(keep='first')] df_163['factor'] = df_sina['factor'] df_163.to_csv(path_163, index=False)
def get_event(security_item, event_type='finance_forecast', start_date=None, end_date=None, index='timestamp'): """ get forecast items. Parameters ---------- security_item : SecurityItem or str the security item,id or code event_type : str {'finance_forecast','finance_report'} start_date: Timestamp str or Timestamp the start date for the event end_date: Timestamp str or Timestamp the end date for the event Returns ------- DataFrame """ security_item = to_security_item(security_item) path = get_event_path(security_item, event_type) if os.path.exists(path): df = pd_utils.pd_read_csv(path, index=index, generate_id=True) df = df_for_date_range(df, start_date=start_date, end_date=end_date) else: df = pd.DataFrame() return df
def get_kdata(security_item, exchange=None, the_date=None, start_date=None, end_date=None, fuquan='bfq', source=None, level='day', generate_id=False): """ get kdata. Parameters ---------- security_item : SecurityItem or str the security item,id or code exchange : str the exchange,set this for cryptocurrency the_date : TimeStamp str or TimeStamp get the kdata for the exact date start_date : TimeStamp str or TimeStamp start date end_date : TimeStamp str or TimeStamp end date fuquan : str {"qfq","hfq","bfq"},default:"bfq" source : str the data source,{'163','sina','exchange'},just used for internal merge level : str or int the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day' Returns ------- DataFrame """ # 由于数字货币的交易所太多,必须指定exchange security_item = to_security_item(security_item, exchange) source = adjust_source(security_item, source) # 163的数据是合并过的,有复权因子,都存在'bfq'目录下,只需从一个地方取数据,并做相应转换 if source == '163': the_path = files_contract.get_kdata_path(security_item, source=source, fuquan='bfq') else: the_path = files_contract.get_kdata_path(security_item, source=source, fuquan=fuquan) if os.path.isfile(the_path): df = pd_utils.pd_read_csv(the_path, generate_id=generate_id) if 'factor' in df.columns and source == '163' and security_item[ 'type'] == 'stock': df_kdata_has_factor = df[df['factor'].notna()] if df_kdata_has_factor.shape[0] > 0: latest_factor = df_kdata_has_factor.tail(1).factor.iat[0] else: latest_factor = None if the_date: if the_date in df.index: df = df.loc[the_date:the_date, :] else: return None else: if start_date or end_date: df = df_for_date_range(df, start_date=start_date, end_date=end_date) # 复权处理 if source == '163' and security_item['type'] == 'stock': if 'factor' in df.columns: # 后复权是不变的 df['hfqClose'] = df.close * df.factor df['hfqOpen'] = df.open * df.factor df['hfqHigh'] = df.high * df.factor df['hfqLow'] = df.low * df.factor # 前复权需要根据最新的factor往回算,当前价格不变 if latest_factor: df['qfqClose'] = df.hfqClose / latest_factor df['qfqOpen'] = df.hfqOpen / latest_factor df['qfqHigh'] = df.hfqHigh / latest_factor df['qfqLow'] = df.hfqLow / latest_factor else: logger.exception("missing latest factor for {}".format( security_item['id'])) return df return pd.DataFrame()