def test_remote_data_service_industry(): from jaqs.data.align import align import pandas as pd arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505) df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ') # df_ann = df.loc[:, ['in_date', 'symbol']] # df_ann = df_ann.set_index(['symbol', 'in_date']) # df_ann = df_ann.unstack(level='symbol') from jaqs.data import DataView dic_sec = jutil.group_df_to_dict(df, by='symbol') dic_sec = {sec: df.reset_index() for sec, df in dic_sec.items()} df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.items()], axis=1) df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.items()], axis=1) dates_arr = ds.get_trade_date_range(20140101, 20170505) res = align(df_value, df_ann, dates_arr) # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date') # df_value = df.pivot(index=None, columns='symbol', values='industry1_code') def align_single_df(df_one_sec): df_value = df_one_sec.loc[:, ['industry1_code']] df_ann = df_one_sec.loc[:, ['in_date']] res = align(df_value, df_ann, dates_arr) return res # res_list = [align_single_df(df) for sec, df in dic_sec.items()] res_list = [align_single_df(df) for df in list(dic_sec.values())[:10]] res = pd.concat(res_list, axis=1)
def gen_report(self, source_dir, template_fn, out_folder='.', selected=None): """ Generate HTML (and PDF) report of the trade analysis. Parameters ---------- source_dir : str path of directory where HTML template and css files are stored. template_fn : str File name of HTML template. out_folder : str Output folder of report. selected : list of str or None List of symbols whose detailed PnL curve and position will be plotted. # TODO: this parameter should not belong to function """ dic = dict() dic['html_title'] = "Alpha Strategy Backtest Result" dic['selected_securities'] = selected dic['props'] = self.configs dic['metrics'] = self.metrics dic['position_change'] = self.position_change dic['account'] = self.account dic['df_daily'] = jutil.group_df_to_dict(self.daily, by='symbol') self.report_dic.update(dic) self.returns.to_csv(os.path.join(out_folder, 'returns.csv')) r = Report(self.report_dic, source_dir=source_dir, template_fn=template_fn, out_folder=out_folder) r.generate_html() r.output_html('report.html')
def get_industry_daily(self, symbol, start_date, end_date, type_='SW', level=1): """ Get index components on each day during start_date and end_date. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int type_ : {'SW', 'ZZ'} Returns ------- res : pd.DataFrame index dates, columns symbols values are industry code """ df_raw = self.get_industry_raw(symbol, type_=type_, level=level) dic_sec = jutil.group_df_to_dict(df_raw, by='symbol') dic_sec = { sec: df.sort_values(by='in_date', axis=0).reset_index() for sec, df in dic_sec.viewitems() } df_ann_tmp = pd.concat( {sec: df.loc[:, 'in_date'] for sec, df in dic_sec.viewitems()}, axis=1) df_value_tmp = pd.concat( { sec: df.loc[:, 'industry{:d}_code'.format(level)] for sec, df in dic_sec.viewitems() }, axis=1) idx = np.unique( np.concatenate([df.index.values for df in dic_sec.values()])) symbol_arr = np.sort(symbol.split(',')) df_ann = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) df_ann.loc[df_ann_tmp.index, df_ann_tmp.columns] = df_ann_tmp df_value = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) df_value.loc[df_value_tmp.index, df_value_tmp.columns] = df_value_tmp dates_arr = self.get_trade_date_range(start_date, end_date) df_industry = align.align(df_value, df_ann, dates_arr) # TODO before industry classification is available, we assume they belong to their first group. df_industry = df_industry.fillna(method='bfill') df_industry = df_industry.astype(str) return df_industry
def query_adj_factor_daily(self, symbol, start_date, end_date, div=False): """ Get index components on each day during start_date and end_date. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int div : bool False for normal adjust factor, True for diff. Returns ------- res : pd.DataFrame index dates, columns symbols values are industry code """ _flt = 'symbol=%s&start_date=%s&end_date=%s' % (symbol, start_date, end_date) # noinspection PyBroadException try: df_raw, msg = self.query('lb.secAdjFactor', _flt, '') except Exception: print('query adjust_factor from Stock_D') return self.query_adj_factor_daily_2(symbol, start_date, end_date) dic_sec = jutil.group_df_to_dict(df_raw, by='symbol') dic_sec = {sec: df.set_index('trade_date').loc[:, 'adjust_factor'] for sec, df in dic_sec.items()} # TODO: duplicate codes with dataview.py: line 512 res = pd.concat(dic_sec, axis=1) # TODO: fillna ? idx = np.unique(np.concatenate([df.index.values for df in dic_sec.values()])) symbol_arr = np.sort(symbol.split(',')) res_final = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) res_final.loc[res.index, res.columns] = res # align to every trade date s, e = df_raw.loc[:, 'trade_date'].min(), df_raw.loc[:, 'trade_date'].max() dates_arr = self.query_trade_dates(s, e) if not len(dates_arr) == len(res_final.index): res_final = res_final.reindex(dates_arr) res_final = res_final.fillna(method='ffill').fillna(method='bfill') if div: res_final = res_final.div(res_final.shift(1, axis=0)).fillna(1.0) # res = res.loc[start_date: end_date, :] res_final.index = res_final.index.astype(int) return res_final
def gen_report(self, source_dir, template_fn, out_folder='.', selected=None): """ Generate HTML (and PDF) report of the trade analysis. Parameters ---------- source_dir : str path of directory where HTML template and css files are stored. template_fn : str File name of HTML template. out_folder : str Output folder of report. selected : list of str or None List of symbols whose detailed PnL curve and position will be plotted. # TODO: this parameter should not belong to function """ dic = dict() dic['html_title'] = "Alpha Strategy Backtest Result" dic['selected_securities'] = selected # we do not want to show username / password in report dic['props'] = { k: v for k, v in self.configs.items() if ('username' not in k and 'password' not in k) } dic['performance_metrics_report'] = self.performance_metrics_report dic['risk_metrics_report'] = self.risk_metrics_report dic['position_change'] = self.position_change dic['account'] = self.account dic['df_daily'] = jutil.group_df_to_dict(self.daily, by='symbol') dic['daily_position'] = None # self.daily_position dic['rebalance_positions'] = self.rebalance_positions self.report_dic.update(dic) r = Report(self.report_dic, source_dir=source_dir, template_fn=template_fn, out_folder=out_folder) r.generate_html() r.output_html('report.html')
def query_adj_factor_daily(self, symbol, start_date, end_date, div=False): """ Get index components on each day during start_date and end_date. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int div : bool False for normal adjust factor, True for diff. Returns ------- res : pd.DataFrame index dates, columns symbols values are industry code """ df_raw = self.query_adj_factor_raw(symbol, start_date=start_date, end_date=end_date) dic_sec = jutil.group_df_to_dict(df_raw, by='symbol') dic_sec = {sec: df.set_index('trade_date').loc[:, 'adjust_factor'] for sec, df in dic_sec.items()} # TODO: duplicate codes with dataview.py: line 512 res = pd.concat(dic_sec, axis=1) # TODO: fillna ? idx = np.unique(np.concatenate([df.index.values for df in dic_sec.values()])) symbol_arr = np.sort(symbol.split(',')) res_final = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) res_final.loc[res.index, res.columns] = res # align to every trade date s, e = df_raw.loc[:, 'trade_date'].min(), df_raw.loc[:, 'trade_date'].max() dates_arr = self.query_trade_dates(s, e) if not len(dates_arr) == len(res_final.index): res_final = res_final.reindex(dates_arr) res_final = res_final.fillna(method='ffill').fillna(method='bfill') if div: res_final = res_final.div(res_final.shift(1, axis=0)).fillna(1.0) # res = res.loc[start_date: end_date, :] return res_final
def query_industry_daily(self, symbol, start_date, end_date, type_='SW', level=1): """ Get index components on each day during start_date and end_date. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int type_ : {'SW', 'ZZ'} Returns ------- res : pd.DataFrame index dates, columns symbols values are industry code """ df_raw = self.query_industry_raw(symbol, type_=type_, level=level) dic_sec = jutil.group_df_to_dict(df_raw, by='symbol') dic_sec = {sec: df.sort_values(by='in_date', axis=0).reset_index() for sec, df in dic_sec.items()} df_ann_tmp = pd.concat({sec: df.loc[:, 'in_date'] for sec, df in dic_sec.items()}, axis=1) df_value_tmp = pd.concat({sec: df.loc[:, 'industry{:d}_code'.format(level)] for sec, df in dic_sec.items()}, axis=1) idx = np.unique(np.concatenate([df.index.values for df in dic_sec.values()])) symbol_arr = np.sort(symbol.split(',')) df_ann = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) df_ann.loc[df_ann_tmp.index, df_ann_tmp.columns] = df_ann_tmp df_value = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) df_value.loc[df_value_tmp.index, df_value_tmp.columns] = df_value_tmp dates_arr = self.query_trade_dates(start_date, end_date) df_industry = align.align(df_value, df_ann, dates_arr) # TODO before industry classification is available, we assume they belong to their first group. df_industry = df_industry.fillna(method='bfill') df_industry = df_industry.astype(str) return df_industry
def test_pdutil(): df = pd.DataFrame(np.random.rand(4, 20)) df.iloc[1, 2] = np.nan df.iloc[3, 4] = np.nan df.iloc[1, 4] = np.nan assert df.isnull().sum().sum() == 3 df.iloc[2, 11] = np.inf df.iloc[2, 12] = -np.inf assert df.isnull().sum().sum() == 3 df2 = jutil.fillinf(df) assert df2.isnull().sum().sum() == 5 res_q = jutil.to_quantile(df, 5, axis=1) df3 = df.copy() df3['group'] = ['a', 'a', 'b', 'a'] dic = jutil.group_df_to_dict(df3, by='group') assert set(list(dic.keys())) == {'a', 'b'}
def test_remote_data_service_industry(): from jaqs.data.align import align import pandas as pd arr = ds.query_index_member(index='000300.SH', start_date=20130101, end_date=20170505) df = ds.query_industry_raw(symbol=','.join(arr), type_='SW') df = ds.query_industry_raw(symbol=','.join(arr), type_='ZZ') # errors try: ds.query_industry_raw(symbol=','.join(arr), type_='ZZ', level=5) except ValueError: pass try: ds.query_industry_raw(symbol=','.join(arr), type_='blabla') except ValueError: pass # df_ann = df.loc[:, ['in_date', 'symbol']] # df_ann = df_ann.set_index(['symbol', 'in_date']) # df_ann = df_ann.unstack(level='symbol') from jaqs.data import DataView dic_sec = jutil.group_df_to_dict(df, by='symbol') dic_sec = {sec: df.reset_index() for sec, df in dic_sec.items()} df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.items()], axis=1) df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.items()], axis=1) dates_arr = ds.query_trade_dates(20140101, 20170505) res = align(df_value, df_ann, dates_arr) # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date') # df_value = df.pivot(index=None, columns='symbol', values='industry1_code') def align_single_df(df_one_sec): df_value = df_one_sec.loc[:, ['industry1_code']] df_ann = df_one_sec.loc[:, ['in_date']] res = align(df_value, df_ann, dates_arr) return res # res_list = [align_single_df(df) for sec, df in dic_sec.items()] res_list = [align_single_df(df) for df in list(dic_sec.values())[:10]] res = pd.concat(res_list, axis=1)
def gen_report(self, source_dir, template_fn, out_folder='.', selected=None): """ Generate HTML (and PDF) report of the trade analysis. Parameters ---------- source_dir : str path of directory where HTML template and css files are stored. template_fn : str File name of HTML template. out_folder : str Output folder of report. selected : list of str or None List of symbols whose detailed PnL curve and position will be plotted. # TODO: this parameter should not belong to function """ dic = dict() dic['html_title'] = "Alpha Strategy Backtest Result" dic['selected_securities'] = selected # we do not want to show username / password in report dic['props'] = {k: v for k, v in self.configs.items() if ('username' not in k and 'password' not in k)} dic['performance_metrics'] = self.performance_metrics dic['risk_metrics'] = self.risk_metrics dic['position_change'] = self.position_change dic['account'] = self.account dic['df_daily'] = jutil.group_df_to_dict(self.daily, by='symbol') dic['daily_position'] = None # self.daily_position dic['rebalance_positions'] = self.rebalance_positions self.report_dic.update(dic) r = Report(self.report_dic, source_dir=source_dir, template_fn=template_fn, out_folder=out_folder) r.generate_html() r.output_html('report.html')