def test_remote_data_service_industry_df(): from jaqs.data.dataservice import Calendar cal = Calendar() ds = RemoteDataService() ds.init_from_config() arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) symbol_arr = ','.join(arr) sec = '000008.SZ' type_ = 'ZZ' df_raw = ds.get_industry_raw(symbol=sec, type_=type_) df = ds.get_industry_daily(symbol=symbol_arr, start_date=df_raw['in_date'].min(), end_date=20170505, type_=type_, level=1) for idx, row in df_raw.iterrows(): in_date = row['in_date'] value = row['industry1_code'] if in_date in df.index: assert df.loc[in_date, sec] == value else: idx = cal.get_next_trade_date(in_date) assert df.loc[idx, sec] == value
def test_remote_data_service_industry(): from jaqs.data.align import align import pandas as pd ds = RemoteDataService() arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ') df = df.astype(dtype={'in_date': int}) # df_ann = df.loc[:, ['in_date', 'symbol']] # df_ann = df_ann.set_index(['symbol', 'in_date']) # df_ann = df_ann.unstack(level='symbol') from jaqs.data.dataview import DataView dic_sec = DataView._group_df_to_dict(df, by='symbol') dic_sec = {sec: df.reset_index() for sec, df in dic_sec.viewitems()} df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1) df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1) dates_arr = ds.get_trade_date(20140101, 20170505) res = align(df_value, df_ann, dates_arr) # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date') # df_value = df.pivot(index=None, columns='symbol', values='industry1_code') def align_single_df(df_one_sec): df_value = df_one_sec.loc[:, ['industry1_code']] df_ann = df_one_sec.loc[:, ['in_date']] res = align(df_value, df_ann, dates_arr) return res # res_list = [align_single_df(df) for sec, df in dic_sec.viewitems()] res_list = [align_single_df(df) for sec, df in dic_sec.items()[:10]] res = pd.concat(res_list, axis=1)