def __init__(self, universe, start_date, end_date, freq, benchmark=905, weights_bandwidth=0.02, industry_cat='sw_adj', industry_level=1, rebalance_method='risk_neutral', bounds=None, **kwargs): self.universe = universe self.dates = makeSchedule(start_date, end_date, freq, 'china.sse') self.dates = [d.strftime('%Y-%m-%d') for d in self.dates] self.benchmark = benchmark self.weights_bandwidth = weights_bandwidth self.freq = freq self.horizon = map_freq(freq) self.executor = NaiveExecutor() self.industry_cat = industry_cat self.industry_level = industry_level self.rebalance_method = rebalance_method self.bounds = bounds self.more_opts = kwargs
def __init__(self, alpha_model, data_meta, universe, start_date, end_date, freq, benchmark=905, industry_cat='sw_adj', industry_level=1, dask_client=None): self.alpha_model = alpha_model self.data_meta = data_meta self.universe = universe self.benchmark = benchmark self.dates = makeSchedule(start_date, end_date, freq, 'china.sse') self.dates = [d.strftime('%Y-%m-%d') for d in self.dates] self.industry_cat = industry_cat self.industry_level = industry_level self.freq = freq self.horizon = map_freq(freq) self.engine = SqlEngine(self.data_meta.data_source) self.dask_client = dask_client self.total_data = None self.index_return = None self.risk_models = None self.alpha_models = None
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) alpha_logger.info("return data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) return dates, df[['trade_date', 'code', 'dx']], df[[ 'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry' ] + transformer.names]
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0, fit_target: Union[Transformer, object] = None): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) alpha_logger.info("fit target data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) df.dropna(inplace=True) return dates, df[[ 'trade_date', 'code', 'dx' ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] + transformer.names]
def fetch_predict_phase(engine, alpha_factors: Union[Transformer, Iterable[object]], ref_date, frequency, universe, batch=1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0, fillna: str = None, fit_target: Union[Transformer, object] = None): if isinstance(alpha_factors, Transformer): transformer = alpha_factors else: transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch - 1) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) horizon = map_freq(frequency) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) if fillna: factor_df = factor_df.groupby('trade_date').apply( lambda x: x.fillna(x.median())).reset_index(drop=True).dropna() else: factor_df = factor_df.dropna() if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) names = transformer.names if neutralized_risk: risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1] used_neutralized_risk = list(set(neutralized_risk).difference(names)) risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(train_x, target_df, on=['trade_date', 'code'], how='left') risk_exp = train_x[neutralized_risk].values.astype(float) else: train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left') risk_exp = None train_x.dropna(inplace=True, subset=train_x.columns[:-1]) x_values = train_x[names].values.astype(float) y_values = train_x[['dx']].values.astype(float) date_label = pd.DatetimeIndex(train_x.trade_date).to_pydatetime() dates = np.unique(date_label) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): end = dates[-1] start = dates[-batch] if batch <= len(dates) else dates[0] left_index = bisect.bisect_left(date_label, start) right_index = bisect.bisect_right(date_label, end) this_raw_x = x_values[left_index:right_index] this_raw_y = y_values[left_index:right_index] sub_dates = date_label[left_index:right_index] if risk_exp is not None: this_risk_exp = risk_exp[left_index:right_index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ne_y = factor_processing(this_raw_y, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) inner_left_index = bisect.bisect_left(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end) ne_x = ne_x[inner_left_index:inner_right_index] ne_y = ne_y[inner_left_index:inner_right_index] left_index = bisect.bisect_left(date_label, end) right_index = bisect.bisect_right(date_label, end) codes = train_x.code.values[left_index:right_index] else: ne_x = None ne_y = None codes = None ret = dict() ret['x_names'] = transformer.names ret['predict'] = { 'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes, 'y': ne_y.flatten() } return ret
def fetch_train_phase(engine, alpha_factors: Union[Transformer, Iterable[object]], ref_date, frequency, universe, batch=1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0, fit_target: Union[Transformer, object] = None) -> dict: if isinstance(alpha_factors, Transformer): transformer = alpha_factors else: transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) horizon = map_freq(frequency) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() target_df, factor_df = df[['trade_date', 'code', 'dx']], df[['trade_date', 'code'] + transformer.names] target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \ _merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model, neutralized_risk) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): pyFinAssert( len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date)) end = dates[-2] start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0] else: end = dates[-1] start = dates[-batch] if batch <= len(dates) else dates[0] index = (date_label >= start) & (date_label <= end) this_raw_x = x_values[index] this_raw_y = y_values[index] this_code = codes[index] if risk_exp is not None: this_risk_exp = risk_exp[index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ne_y = factor_processing(this_raw_y, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ret = dict() ret['x_names'] = transformer.names ret['train'] = { 'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code } return ret