def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('turnover,float_mv,close_adj,pe,pb'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Cutoff(Standardize(turnover / 10000 / float_mv), 2)' dv.add_formula('TO', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(1/pb), 2)' dv.add_formula('BP', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(Return(close_adj, 20)), 2)' dv.add_formula('REVS20', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(Log(float_mv)), 2)' dv.add_formula('float_mv_factor', factor_formula, is_quarterly=False) factor_formula = 'Delay(Return(close_adj, 1), -1)' dv.add_formula('NextRet', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) mask_limit_reached = dv.get_ts('mask_limit_reached') mask_index_member = dv.get_ts('mask_index_member') mask_sus = dv.get_ts('mask_sus') mask_all = np.logical_or( mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data price = dv.get_ts('close_adj') price_bench = dv.data_benchmark dv.add_formula('in_', '(Delay(index_weight, 1) == 0) && (index_weight > 0)', is_quarterly=False) signal = dv.get_ts('in_').shift(1, axis=0) # avoid look-ahead bias # Step.4 analyze! obj = SignalDigger(output_folder='../../output', output_format='pdf') obj.create_binary_event_report(signal, price, mask_all, price_bench, periods=[20, 60, 121, 242], group_by=None)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() trade_status = dv.get_ts('trade_status') mask_sus = trade_status == '停牌' dv.append_df(mask_sus, 'suspended', is_quarterly=False) dv.add_formula('not_index_member', '!index_member', is_quarterly=False) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_data(): """ This function fetches data from remote server and stores them locally. Then we can use local data to do back-test. """ dataview_props = {'start_date': 20170101, # Start and end date of back-test 'end_date': 20171030, 'universe': UNIVERSE, # Investment universe and performance benchmark 'benchmark': '000905.SH', 'fields': 'high,low,close', # Data fields that we need 'freq': 1 # freq = 1 means we use daily data. Please do not change this. } # RemoteDataService communicates with a remote server to fetch data ds = RemoteDataService() # Use username and password in data_config to login ds.init_from_config(data_config) # DataView utilizes RemoteDataService to get various data and store them dv = DataView() dv.init_from_config(dataview_props, ds) dv.prepare_data() # 以9日为周期的KD线为例。首先须计算出最近9日的RSV值,即未成熟随机值, # 计算公式为 # 9日RSV=(C-L9)÷(H9-L9)×100 # 式中,C为第9日的收盘价;L9为9日内的最低价;H9为9日内的最高价。 # K值=2/3×前一日K值+1/3×当日RSV # D值=2/3×前一日K值+1/3×当日RSV # J指标的计算公式为: J=3D—2K factor_formula = '2/3*50 + (close-Ts_Min(low,5))/(Ts_Max(high,5)-Ts_Min(low,5)*100)' dv.add_formula() dv.save_dataview(folder_path=dataview_store_folder)
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) mask_limit_reached = dv.get_ts('mask_limit_reached') mask_index_member = dv.get_ts('mask_index_member') mask_sus = dv.get_ts('mask_sus') mask_all = np.logical_or(mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data price = dv.get_ts('close_adj') price_bench = dv.data_benchmark dv.add_formula('in_', '(Delay(index_weight, 1) == 0) && (index_weight > 0)', is_quarterly=False) signal = dv.get_ts('in_').shift(1, axis=0) # avoid look-ahead bias # Step.4 analyze! obj = SignalDigger(output_folder='../../output', output_format='pdf') obj.create_binary_event_report(signal, price, mask_all, price_bench, periods=[20, 60, 121, 242], group_by=None)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('turnover,float_mv,close_adj,pe,pb'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Cutoff(Standardize(turnover / 10000 / float_mv), 2)' dv.add_formula('TO', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(1/pb), 2)' dv.add_formula('BP', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(Return(close_adj, 20)), 2)' dv.add_formula('REVS20', factor_formula, is_quarterly=False) factor_formula = 'Cutoff(Standardize(Log(float_mv)), 2)' dv.add_formula('float_mv_factor', factor_formula, is_quarterly=False) factor_formula = 'Delay(Return(close_adj, 1), -1)' dv.add_formula('NextRet', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20170101, 'end_date': 20171030, 'universe': '000300.SH', 'fields': ( 'open,high,low,close,vwap,volume,turnover,sw1,' # + 'pb,net_assets,' + 'eps_basic,total_mv,tot_profit,int_income'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'close >= Delay(Ts_Max(close, 20), 1)' # 20 days new high factor_name = 'new_high' dv.add_formula(factor_name, factor_formula, is_quarterly=False) dv.add_formula('total_profit_growth', formula='Return(tot_profit, 4)', is_quarterly=True) dv.save_dataview(folder_path=dataview_dir_path)
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data target_symbol = '600519.SH' price = dv.get_ts('close_adj', symbol=target_symbol) dv.add_formula('in_', 'open_adj / Delay(close_adj, 1)', is_quarterly=False) signal = dv.get_ts('in_', symbol=target_symbol).shift(1, axis=0) # avoid look-ahead bias # Step.4 analyze! obj = SignalDigger(output_folder='../../output', output_format='pdf') obj.create_single_signal_report(signal, price, [1, 5, 9, 21], 6, mask=None, buy_condition={'cond1': {'column': 'quantile', 'filter': lambda x: x > 3, 'hold': 5}, 'cond2': {'column': 'quantile', 'filter': lambda x: x > 5, 'hold': 5}, 'cond3': {'column': 'quantile', 'filter': lambda x: x > 5, 'hold': 9}, })
def simple_test_signal(): dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula('open_jump', 'open_adj / Delay(close_adj, 1)', is_quarterly=False) # good analyze_signal(dv, 'open_jump', 'pdf') print("Signal return & IC test finished.")
def add(formula, name): dv = DataView() dv.load_dataview(folder_path=dataview_dir_path) dv.add_formula(name, formula, is_quarterly=False, formula_func_name_style='lower') dv.save_dataview(folder_path=dataview_dir_path)
def test_DIY_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # 方法1:add_formula 基于dataview里已有的字段,通过表达式定义因子 dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) # 方法2: append_df 构造一个因子表格(pandas.Dataframe),直接添加到dataview当中 import pandas as pd import talib as ta close = dv.get_ts("close_adj").dropna(how='all', axis=1) slope_df = pd.DataFrame( { sec_symbol: -ta.LINEARREG_SLOPE(value.values, 10) for sec_symbol, value in close.iteritems() }, index=close.index) dv.append_df(slope_df, 'slope') dv.get_ts("slope") # 定义事件 from jaqs_fxdayu.research.signaldigger import process Open = dv.get_ts("open_adj") High = dv.get_ts("high_adj") Low = dv.get_ts("low_adj") Close = dv.get_ts("close_adj") trade_status = dv.get_ts('trade_status') mask_sus = trade_status != 1 # 剔除掉停牌期的数据 再计算指标 open_masked = process._mask_df(Open, mask=mask_sus) high_masked = process._mask_df(High, mask=mask_sus) low_masked = process._mask_df(Low, mask=mask_sus) close_masked = process._mask_df(Close, mask=mask_sus) from jaqs_fxdayu.data import signal_function_mod as sfm MA5 = sfm.ta(ta_method='MA', ta_column=0, Open=open_masked, High=high_masked, Low=low_masked, Close=close_masked, Volume=None, timeperiod=10) MA10 = sfm.ta('MA', Close=close_masked, timeperiod=10) dv.append_df(MA5, 'MA5') dv.append_df(MA10, 'MA10') dv.add_formula("Cross", "(MA5>=MA10)&&(Delay(MA5<MA10, 1))", is_quarterly=False, add_data=True)
def analyze_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌'.encode('utf-8') df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) df_limit_reached = dv.get_ts('limit_reached') mask_limit_reached = df_limit_reached > 0 mask_all = np.logical_or( mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data # dv.add_formula('illi_daily', '(high - low) * 1000000000 / turnover', is_quarterly=False) # dv.add_formula('illi', 'Ewma(illi_daily, 11)', is_quarterly=False) # dv.add_formula('size', 'Log(float_mv)', is_quarterly=False) # dv.add_formula('value', '-1.0/pb', is_quarterly=False) # dv.add_formula('liquidity', 'Ts_Mean(volume, 22) / float_mv', is_quarterly=False) dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False) signal = dv.get_ts('divert').shift(1, axis=0) # avoid look-ahead bias price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! my_period = 5 obj = SignalDigger(output_folder='../../output/test_signal', output_format='pdf') obj.process_signal_before_analysis( signal, price=price, mask=mask_all, n_quantiles=5, period=my_period, benchmark_price=price_bench, ) res = obj.create_full_report()
def test_add_formula(): dv = DataView() dv.load_dataview(folder_path=daily_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'Delta(high - close, 1)' dv.add_formula('myvar1', formula, is_quarterly=False) assert dv.data_d.shape == (nrows, ncols + 1 * n_securities) formula2 = 'myvar1 - close' dv.add_formula('myvar2', formula2, is_quarterly=False) assert dv.data_d.shape == (nrows, ncols + 2 * n_securities)
def test_add_formula(): dv = DataView() dv.load_dataview(folder_path=daily_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'Delta(high - close, 1)' dv.add_formula('myvar1', formula, is_quarterly=False) assert dv.data_d.shape == (nrows, ncols + 1 * n_securities) formula2 = 'myvar1 - close' dv.add_formula('myvar2', formula2, is_quarterly=False) assert dv.data_d.shape == (nrows, ncols + 2 * n_securities)
def test_add_formula_directly(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = {'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 'fields': 'open,close', 'freq': 1} dv.init_from_config(props, data_api=ds) dv.prepare_data() dv.add_formula("myfactor", 'close / open', is_quarterly=False) assert dv.data_d.shape == (281, 39)
def test_q_add_formula(): dv = DataView() folder_path = '../output/prepared/20160609_20170601_freq=1D' dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'total_oper_rev / close' dv.add_formula('myvar1', formula, is_quarterly=False) df1 = dv.get_ts('myvar1') assert not df1.empty formula2 = 'Delta(oper_exp * myvar1 - open, 3)' dv.add_formula('myvar2', formula2, is_quarterly=False) df2 = dv.get_ts('myvar2') assert not df2.empty
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20170201, 'end_date': 20171001, 'universe': '000300.SH', 'fields': ('float_mv,sw2,sw1'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'GroupQuantile(float_mv, sw1, 10)' dv.add_formula('gq30', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20170901, 'end_date': 20171129, 'universe': BENCHMARK, 'fields': 'close,volume,sw1', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() dv.add_formula('ret', 'Return(close_adj, 20)', is_quarterly=False) dv.add_formula('rank_ret', 'Rank(ret)', is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_q_add_formula(): dv = DataView() folder_path = '../output/prepared/20160609_20170601_freq=1D' dv.load_dataview(folder_path=quarterly_path) nrows, ncols = dv.data_d.shape n_securities = len(dv.data_d.columns.levels[0]) formula = 'total_oper_rev / close' dv.add_formula('myvar1', formula, is_quarterly=False) df1 = dv.get_ts('myvar1') assert not df1.empty formula2 = 'Delta(oper_exp * myvar1 - open, 3)' dv.add_formula('myvar2', formula2, is_quarterly=False) df2 = dv.get_ts('myvar2') assert not df2.empty
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20170101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'pe_ttm,net_profit_incl_min_int_inc', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Return(net_profit_incl_min_int_inc, 4)' factor_name = 'net_profit_growth' dv.add_formula(factor_name, factor_formula, is_quarterly=True) dv.save_dataview(folder_path=dataview_dir_path)
def test_add_formula_directly(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() secs = '600030.SH,000063.SZ,000001.SZ' props = { 'start_date': 20160601, 'end_date': 20170601, 'symbol': secs, 'fields': 'open,close', 'freq': 1 } dv.init_from_config(props, data_api=ds) dv.prepare_data() dv.add_formula("myfactor", 'close / open', is_quarterly=False) assert dv.data_d.shape == (281, 39)
def test_analyze_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) mask = mask_index_member(dv) can_enter, can_exit = limit_up_down(dv) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False, add_data=True) signal = dv.get_ts('divert') price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! my_period = 5 obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf') obj.process_signal_before_analysis( signal=signal, price=price, high=dv.get_ts("high_adj"), # 可为空 low=dv.get_ts("low_adj"), # 可为空 group=dv.get_ts("sw1"), n_quantiles=5, # quantile分类数 mask=mask, # 过滤条件 can_enter=can_enter, # 是否能进场 can_exit=can_exit, # 是否能出场 period=my_period, # 持有期 benchmark_price=price_bench, # 基准价格 可不传入,持有期收益(return)计算为绝对收益 commission=0.0008, ) signal_data = obj.signal_data result = analysis(signal_data, is_event=False, period=my_period) ic = pfm.calc_signal_ic(signal_data, by_group=True) mean_ic_by_group = pfm.mean_information_coefficient(ic, by_group=True) plotting.plot_ic_by_group(mean_ic_by_group) res = obj.create_full_report()
def test_save_dataview(sub_folder='test_dataview'): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('float_mv,tot_shrhldr_eqy_excl_min_int,deferred_tax_assets,sw2'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Quantile(-float_mv,5)' dv.add_formula('rank_mv', factor_formula, is_quarterly=False) factor_formula = 'Quantile(float_mv/(tot_shrhldr_eqy_excl_min_int+deferred_tax_assets), 5)' dv.add_formula('rank_pb', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_ttm(): from jaqs.data import DataView ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20120101, 'end_date': 20170601, 'universe': '000016.SH', 'fields': ('net_profit_incl_min_int_inc'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() dv.add_formula('single', 'TTM(net_profit_incl_min_int_inc)', is_quarterly=True)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20170901, 'end_date': 20171129, 'universe': BENCHMARK, 'fields': 'close,volume,sw1', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() dv.add_formula('ret', 'Return(close_adj, 20)', is_quarterly=False) dv.add_formula('rank_ret', 'Rank(ret)', is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() trade_status = dv.get_ts('trade_status') mask_sus = trade_status == '停牌' dv.append_df(mask_sus, 'suspended', is_quarterly=False) dv.add_formula('not_index_member', '!index_member', is_quarterly=False) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20150101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover,float_mv,pb,total_mv', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('random', 'StdDev(volume, 20)', is_quarterly=False) dv.add_formula('momentum', 'Return(close_adj, 20)', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover', 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False) dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' dv.append_df(mask_sus, 'mask_sus', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20160101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': 'volume,turnover', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() # for convenience to check limit reachers dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) dv.add_formula('mask_limit_reached', 'limit_reached > 0', is_quarterly=False) dv.add_formula('mask_index_member', '!(index_member > 0)', is_quarterly=False) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' dv.append_df(mask_sus, 'mask_sus', is_quarterly=False) # dv.add_formula('size', '', is_quarterly=False) dv.save_dataview(dataview_folder)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20170101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': ('float_mv,pb,pe_ttm,sw2'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'GroupQuantile(-float_mv, sw2, 10)' dv.add_formula('rank_mv', factor_formula, is_quarterly=False) factor_formula = 'GroupQuantile(If(pb >= 0.2, pb, 100), sw2, 10)' dv.add_formula('rank_pb', factor_formula, is_quarterly=False) factor_formula = 'GroupQuantile(If(pe_ttm >= 3, pe_ttm, 9999.0), sw2, 10)' dv.add_formula('rank_pe', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def analyze_event(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌'.encode('utf-8') df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) df_limit_reached = dv.get_ts('limit_reached') mask_limit_reached = df_limit_reached > 0 mask_all = np.logical_or( mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data dv.add_formula('new_high', 'close_adj >= Ts_Max(close_adj, 300)', is_quarterly=False) dv.add_formula('new_high_delay', 'Delay(Ts_Max(new_high, 300), 1)', is_quarterly=False) dv.add_formula('sig', 'new_high && (! new_high_delay)', is_quarterly=False) signal = dv.get_ts('sig').shift(0, axis=0) # avoid look-ahead bias price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! obj = SignalDigger(output_folder=jutil.join_relative_path('../output'), output_format='pdf') obj.create_binary_event_report(signal, price, mask_all, 5, price_bench, periods=[5, 20, 40])
def test_analyze_signal(): # -------------------------------------------------------------------------------- # Step.1 load dataview dv = DataView() dv.load_dataview(dataview_folder) # -------------------------------------------------------------------------------- # Step.2 calculate mask (to mask those ill data points) trade_status = dv.get_ts('trade_status') mask_sus = trade_status == u'停牌' df_index_member = dv.get_ts('index_member') mask_index_member = ~(df_index_member > 0) dv.add_formula('limit_reached', 'Abs((open - Delay(close, 1)) / Delay(close, 1)) > 0.095', is_quarterly=False) df_limit_reached = dv.get_ts('limit_reached') mask_limit_reached = df_limit_reached > 0 mask_all = np.logical_or(mask_sus, np.logical_or(mask_index_member, mask_limit_reached)) # -------------------------------------------------------------------------------- # Step.3 get signal, benchmark and price data dv.add_formula('divert', '- Correlation(vwap_adj, volume, 10)', is_quarterly=False) signal = dv.get_ts('divert').shift(1, axis=0) # avoid look-ahead bias price = dv.get_ts('close_adj') price_bench = dv.data_benchmark # Step.4 analyze! my_period = 5 obj = SignalDigger(output_folder='../output/test_signal', output_format='pdf') obj.process_signal_before_analysis(signal, price=price, mask=mask_all, n_quantiles=5, period=my_period, benchmark_price=price_bench, ) res = obj.create_full_report()
def test_save_dataview(sub_folder='test_dataview'): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = { 'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('float_mv,tot_shrhldr_eqy_excl_min_int,deferred_tax_assets,sw2'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Quantile(-float_mv,5)' dv.add_formula('rank_mv', factor_formula, is_quarterly=False) factor_formula = 'Quantile(float_mv/(tot_shrhldr_eqy_excl_min_int+deferred_tax_assets), 5)' dv.add_formula('rank_pb', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() #dataview参数选择 props = { 'start_date': 20080527, 'end_date': 20180807, 'universe': '000002.SH,399107.SZ', "benchmark": "000905.SH,000905.SH", 'fields': ('open,close,volume,vwap,high,low,turnover'), 'freq': 1 } dv.init_from_config(props, ds) dv.prepare_data() #因子 factor_formula = 'rank(volume)*(ts_sum(close, 5)/5)*(vwap-close)/(high-low)' dv.add_formula('alpha', factor_formula, is_quarterly=False, formula_func_name_style='lower') dv.save_dataview(folder_path=dataview_dir_path)
def download_data(): dataview_props = { 'start_date': 20120101, 'end_date': 20181231, 'universe': '000905.SH', # 'symbol':'600030.SH,600104.SH', 'fields': 'open,close,high,low,close_adj,volume', 'freq': 1 } ds = RemoteDataService() ds.init_from_config(data_config) # DataView utilizes RemoteDataService to get various data and store them dv = DataView() dv.init_from_config(dataview_props, ds) dv.prepare_data() factor_formula = 'Delay(Return(close_adj, 2, 0), -2)' dv.add_formula('future_return_2', factor_formula, is_quarterly=False, is_factor=False) factor_formula = 'Delay(Return(close_adj, 3, 0), -3)' dv.add_formula('future_return_3', factor_formula, is_quarterly=False, is_factor=False) factor_formula = 'Delay(Return(close_adj, 4, 0), -4)' dv.add_formula('future_return_4', factor_formula, is_quarterly=False, is_factor=False) factor_formula = 'Delay(Return(close_adj, 5, 0), -5)' dv.add_formula('future_return_5', factor_formula, is_quarterly=False, is_factor=False) dv.save_dataview(folder_path=dataview_store_folder)
def save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': start_date, 'end_date': end_date, 'universe': universe, 'fields': 'roe_yearly,roa_yearly', 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() dv.add_formula('roe_cond', 'roe_yearly >= 20', is_quarterly=True) dv.add_formula('roa_cond', 'roa_yearly >= 5', is_quarterly=True) dv.add_formula('cond', 'roe_cond && roa_cond', is_quarterly=True) dv.save_dataview(folder_path=dataview_folder)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20170101, 'end_date': 20171001, 'universe': '000300.SH', 'fields': ('float_mv,pb,pe_ttm,sw2'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'GroupQuantile(-float_mv, sw2, 10)' dv.add_formula('rank_mv', factor_formula, is_quarterly=False) factor_formula = 'GroupQuantile(If(pb >= 0.2, pb, 100), sw2, 10)' dv.add_formula('rank_pb', factor_formula, is_quarterly=False) factor_formula = 'GroupQuantile(If(pe_ttm >= 3, pe_ttm, 9999.0), sw2, 10)' dv.add_formula('rank_pe', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
def test_save_dataview(): ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() props = {'start_date': 20150101, 'end_date': 20170930, 'universe': '000905.SH', 'fields': ('tot_cur_assets,tot_cur_liab,inventories,pre_pay,deferred_exp,' 'eps_basic,ebit,pe,pb,float_mv,sw1'), 'freq': 1} dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'pe < 30' dv.add_formula('pe_condition', factor_formula, is_quarterly=False) factor_formula = 'pb < 3' dv.add_formula('pb_condition', factor_formula, is_quarterly=False) factor_formula = 'Return(eps_basic, 4) > 0' dv.add_formula('eps_condition', factor_formula, is_quarterly=True) factor_formula = 'Return(ebit, 4) > 0' dv.add_formula('ebit_condition', factor_formula, is_quarterly=True) factor_formula = 'tot_cur_assets/tot_cur_liab > 2' dv.add_formula('current_condition', factor_formula, is_quarterly=True) factor_formula = '(tot_cur_assets - inventories - pre_pay - deferred_exp)/tot_cur_liab > 1' dv.add_formula('quick_condition', factor_formula, is_quarterly=True) dv.add_formula('mv_rank', 'Rank(float_mv)', is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
dv.load_dataview(dataview_folder2) dv.fields # In[6]: sw1 = dv.get_ts('sw1') dict_classify = {'480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融', '370000': '医药生物', '710000': '计算机', '280000': '汽车', '340000': '食品饮料', '220000': '化工', '210000': '采掘', '230000': '钢铁', '650000': '国防军工', '110000': '农林牧渔', '420000': '交通运输', '620000': '建筑装饰', '350000': '纺织服装', '610000': '建筑材料', '360000': '轻工制造'} sw1_name = sw1.replace(dict_classify) # In[76]: pm = dv.add_formula('pm','tot_profit/float_mv',is_quarterly=False,add_data=True) ETOP = dv.add_formula('ETOP','tot_profit/total_mv',is_quarterly=False,add_data=True) roa = dv.add_formula('roa','roa',is_quarterly=True,add_data=True) roe = dv.add_formula('roe','roe',is_quarterly=True,add_data=True) import alpha32_,alpha194,alpha195,alpha42_,alpha62_,alpha64_,alpha197,alpha211,alpha56_ dv.append_df(alpha32_.run_formula(dv),'alpha32_') dv.append_df(alpha194_.run_formula(dv),'alpha194_') dv.append_df(alpha195_.run_formula(dv),'alpha195_') dv.append_df(alpha42_.run_formula(dv),'alpha42_') dv.append_df(alpha62_.run_formula(dv),'alpha62_') dv.append_df(alpha64_.run_formula(dv),'alpha64_') dv.append_df(alpha197_.run_formula(dv),'alpha197_') dv.append_df(alpha211_.run_formula(dv),'alpha211_') dv.append_df(alpha56_.run_formula(dv),'alpha56_')
def test_multi_factor(): from jaqs_fxdayu.research.signaldigger import multi_factor, process dv = DataView() dv.load_dataview(dataview_folder) dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True) mask = mask_index_member(dv) can_enter, can_exit = limit_up_down(dv) ic = dict() factors_dict = { signal: dv.get_ts(signal) for signal in ["pb", "pe", "ps", "momentum"] } for period in [5, 15]: ic[period] = multi_factor.get_factors_ic_df( factors_dict, price=dv.get_ts("close_adj"), high=dv.get_ts("high_adj"), # 可为空 low=dv.get_ts("low_adj"), # 可为空 n_quantiles=5, # quantile分类数 mask=mask, # 过滤条件 can_enter=can_enter, # 是否能进场 can_exit=can_exit, # 是否能出场 period=period, # 持有期 benchmark_price=dv. data_benchmark, # 基准价格 可不传入,持有期收益(return)计算为绝对收益 commission=0.0008, ) factor_dict = dict() index_member = dv.get_ts("index_member") for name in ["pb", "pe", "ps", "momentum"]: signal = -1 * dv.get_ts(name) # 调整符号 process.winsorize(factor_df=signal, alpha=0.05, index_member=index_member) # 去极值 signal = process.rank_standardize( signal, index_member) # 因子在截面排序并归一化到0-1(只保留排序信息) signal = process.standardize(signal, index_member) # z-score标准化 保留排序信息和分布信息 # 行业市值中性化 signal = process.neutralize( signal, group=dv.get_ts("sw1"), float_mv=dv.get_ts("float_mv"), index_member=index_member, # 是否只处理时只考虑指数成份股 ) factor_dict[name] = signal # 因子间存在较强同质性时,使用施密特正交化方法对因子做正交化处理,用得到的正交化残差作为因子 new_factors = multi_factor.orthogonalize( factors_dict=factor_dict, standardize_type="rank", # 输入因子标准化方法,有"rank"(排序标准化),"z_score"(z-score标准化)两种("rank"/"z_score") winsorization=False, # 是否对输入因子去极值 index_member=index_member) # 是否只处理指数成分股 # 多因子组合-动态加权参数配置 props = { 'price': dv.get_ts("close_adj"), 'high': dv.get_ts("high_adj"), # 可为空 'low': dv.get_ts("low_adj"), # 可为空 'ret_type': 'return', # 可选参数还有upside_ret/downside_ret 则组合因子将以优化潜在上行、下行空间为目标 'benchmark_price': dv.data_benchmark, # 为空计算的是绝对收益 不为空计算相对收益 'period': 30, # 30天的持有期 'mask': mask, 'can_enter': can_enter, 'can_exit': can_exit, 'forward': True, 'commission': 0.0008, "covariance_type": "shrink", # 协方差矩阵估算方法 还可以为"simple" "rollback_period": 120 } # 滚动窗口天数 comb_factors = dict() for method in [ "equal_weight", "ic_weight", "ir_weight", "max_IR", "max_IC", "factors_ret_weight" ]: comb_factors[method] = multi_factor.combine_factors( factor_dict, standardize_type="rank", winsorization=False, weighted_method=method, props=props)
import time import pandas as pd from jaqs.data import RemoteDataService from jaqs.data import DataView import jaqs.util as jutil from config_path import DATA_CONFIG_PATH, TRADE_CONFIG_PATH data_config = jutil.read_json(DATA_CONFIG_PATH) trade_config = jutil.read_json(TRADE_CONFIG_PATH) dataview_dir_path = '../../output/canslim/dataview' props = { 'start_date': 20170101, 'end_date': 20180516, 'universe': '000905.SH', 'fields': "", 'freq': 1 } ds = RemoteDataService() ds.init_from_config(data_config) dv = DataView() dv.init_from_config(props, ds) dv.prepare_data() factor_formula = 'Quantile(-float_mv,5)' dv.add_formula('rank_mv', factor_formula, is_quarterly=False) dv.save_dataview(folder_path=dataview_dir_path)
{ name: value.dropna().rolling(120).std()**2 for name, value in pct_return.iteritems() }, index=pct_return.index).fillna(method='ffill') gainvariance120 = dv.append_df(temp, 'gainvariance120') #alpha46 def mean(df, day): return df.rolling(window=day, center=False).mean() alpha46 = dv.add_formula( 'alpha46', "(mean(close,3)+mean(close,6)+mean(close,12)+mean(close,24))/(4*close)", is_quarterly=False, add_data=True, register_funcs={"mean": mean}) #alpha48 def sum_fxdayu(df, day): return df.rolling(window=day, center=False).sum() alpha48 = dv.add_formula( 'alpha48', "(-1*((Rank(((Sign((close - Delay(close, 1))) + Sign((Delay(close, 1) - Delay(close, 2)))) +Sign((Delay(close, 2) - Delay(close, 3)))))) * SUM(volume, 5)) / SUM(volume, 20))", is_quarterly=False, add_data=True, register_funcs={"SUM": sum_fxdayu})