def _filter_sec_on_tiaocang_date(self, tiaocang_date, sec_id): sse_cal = Calendar('China.SSE') tiaocang_date_prev = sse_cal.advanceDate( Date.strptime(str(tiaocang_date)[:10]), Period('-1b')).toDateTime() tiaocang_date_prev2 = sse_cal.advanceDate( Date.strptime(str(tiaocang_date)[:10]), Period('-2b')).toDateTime() price_data = get_sec_price(start_date=tiaocang_date_prev2, end_date=tiaocang_date, sec_ids=sec_id, data_source=self._data_source, csv_path=self._csv_path) price_data = price_data.transpose() price_data.index.name = 'secID' # 去除涨幅过大可能买不到的 price_data['returnFilter'] = price_data[tiaocang_date] / price_data[ tiaocang_date_prev] > 1 + self._filter_return_on_tiaocang_date # 去除有NaN的, 新股 price_data['ipoFilter'] = pd.isnull(price_data[tiaocang_date] * price_data[tiaocang_date_prev] * price_data[tiaocang_date_prev2]) # 去除停牌的,此处判断标准就是连续三天收盘价格一样 price_data['tingpaiFilter'] = ( (price_data[tiaocang_date] == price_data[tiaocang_date_prev]) & (price_data[tiaocang_date_prev] == price_data[tiaocang_date_prev2])) price_data['filters'] = 1 - (1 - price_data['returnFilter']) * ( 1 - price_data['ipoFilter']) * (1 - price_data['tingpaiFilter']) return price_data['filters']
def testDailySchedule(self): # Jan 2 and Jan 3 are skipped as New Year holiday # Jan 7 is skipped as weekend # Jan 8 is adjusted to Jan 9 with following convention startDate = Date(2012, 1, 1) s = Schedule(startDate, startDate + 7, Period(length=1, units=TimeUnits.Days), Calendar("China.SSE"), BizDayConventions.Preceding) expected = [ Date(2011, 12, 30), Date(2012, 1, 4), Date(2012, 1, 5), Date(2012, 1, 6), Date(2012, 1, 9) ] self.checkDates(s, expected) # The schedule should skip Saturday 21st and Sunday 22rd. # Previously, it would adjust them to Friday 20th, resulting # in three copies of the same date. startDate = Date(2012, 1, 17) s = Schedule(startDate, startDate + 7, Period(length=1, units=TimeUnits.Days), Calendar("Target"), BizDayConventions.Preceding) expected = [ Date(2012, 1, 17), Date(2012, 1, 18), Date(2012, 1, 19), Date(2012, 1, 20), Date(2012, 1, 23), Date(2012, 1, 24) ] self.checkDates(s, expected)
def forward_date(date, tenor, date_format='%Y-%m-%d'): try: # use pyfin instead to get more accurate and flexible date math start_date = Date.strptime(date, date_format) sseCal = Calendar('China.SSE') ret = sseCal.advanceDate(start_date, Period('-' + tenor), endOfMonth=True) # 此处返回的是上一期期末日期,再向后调整一天,以避免区间日期重叠 ret = sseCal.advanceDate(ret, Period('1b')) return str(ret) except NameError: pass
def _map_horizon(frequency: str) -> int: parsed_period = Period(frequency) unit = parsed_period.units() length = parsed_period.length() if unit == TimeUnits.BDays or unit == TimeUnits.Days: return length - 1 elif unit == TimeUnits.Weeks: return 5 * length - 1 elif unit == TimeUnits.Months: return 22 * length - 1 else: raise ValueError( '{0} is an unrecognized frequency rule'.format(frequency))
def testWeeksDaysAlgebra(self): twoWeeks = Period(2, TimeUnits.Weeks) oneWeek = Period(1, TimeUnits.Weeks) threeDays = Period(3, TimeUnits.Days) oneDay = Period(1, TimeUnits.Days) n = 2 flag = twoWeeks / n == oneWeek self.assertTrue(flag, "division error: {0} / {1:d}" " not equal to {2}".format(twoWeeks, n, oneWeek)) n = 7 flag = oneWeek / 7 == oneDay self.assertTrue(flag, "division error: {0} / {1:d}" " not equal to {2}".format(oneWeek, n, oneDay)) sum = threeDays sum += oneDay flag = sum == Period(4, TimeUnits.Days) self.assertTrue(flag, "sum error: {0}" " + {1}" " != {2}".format(threeDays, oneDay, Period(4, TimeUnits.Days))) sum += oneWeek flag = sum == Period(11, TimeUnits.Days) self.assertTrue(flag, "sum error: {0}" " + {1}" " + {2}" " != {3}".format(threeDays, oneDay, oneWeek, Period(11, TimeUnits.Days))) sevenDays = Period(7, TimeUnits.Days) flag = sevenDays.length == 7 self.assertTrue(flag, "normalization error: sevenDays.length" " is {0:d}" " instead of 7".format(sevenDays.length)) flag = sevenDays.units == TimeUnits.Days self.assertTrue(flag, "normalization error: sevenDays.units" " is {0:d}" " instead of {1:d}".format(sevenDays.units, TimeUnits.Days)) normalizedSevenDays = sevenDays.normalize() flag = normalizedSevenDays.length == 1 self.assertTrue(flag, "normalization error: normalizedSevenDays.length" " is {0:d}" " instead of 1".format(normalizedSevenDays.length)) flag = normalizedSevenDays.units == TimeUnits.Weeks self.assertTrue(flag, "normalization error: TwelveMonths.units" " is {0:d}" " instead of {1:d}".format(normalizedSevenDays.units, TimeUnits.Weeks))
def makeSchedule(firstDate, endDate, tenor): cal = Calendar('NullCalendar') firstDate = check_date(firstDate) endDate = check_date(endDate) tenor = Period(tenor) schedule = Schedule(firstDate, endDate, tenor, cal) return [d.toDateTime() for d in schedule]
def advanceDateByCalendar(holidayCenter, referenceDate, period, convention=BizDayConventions.Following): cal = Calendar(holidayCenter) refer = check_date(referenceDate) period = Period(period) return cal.advanceDate(refer, period, convention).toDateTime()
def testScheduleDeepCopy(self): startDate = Date(2013, 3, 31) endDate = Date(2013, 6, 30) tenor = Period('1m') cal = Calendar('NullCalendar') sch = Schedule(startDate, endDate, tenor, cal) copied_sch = copy.deepcopy(sch) self.assertEqual(sch, copied_sch)
def testScheduleInitializeWithYearly(self): startDate = Date(2012, 2, 29) endDate = Date(2013, 3, 1) tenor = Period('1y') cal = Calendar('NullCalendar') sch = Schedule(startDate, endDate, tenor, cal) expected = [Date(2012, 2, 29), Date(2013, 2, 28), Date(2013, 3, 1)] for i in range(sch.size()): self.assertEqual(expected[i], sch[i])
def get_pos_adj_date(start_date, end_date, formats="%Y-%m-%d", calendar='China.SSE', freq='m', return_biz_day=False): """ :param start_date: str/datetime.datetime, start date of strategy :param end_date: str/datetime.datetime, end date of strat egy :param formats: optional, formats of the string date :param calendar: str, optional, name of the calendar to use in dates math :param freq: str, optional, the frequency of data :param return_biz_day: bool, optional, if the return dates are biz days :return: list of datetime.datetime, pos adjust dates """ if isinstance(start_date, str) and isinstance(end_date, str): d_start_date = Date.strptime(start_date, formats) d_end_date = Date.strptime(end_date, formats) elif isinstance(start_date, datetime.datetime) and isinstance( end_date, datetime.datetime): d_start_date = Date.fromDateTime(start_date) d_end_date = Date.fromDateTime(end_date) cal = Calendar(calendar) pos_adjust_date = Schedule(d_start_date, d_end_date, Period(length=1, units=_freqDict[freq]), cal, BizDayConventions.Unadjusted) # it fails if setting dStartDate to be first adjustment date, then use Schedule to compute the others # so i first compute dates list in each period, then compute the last date of each period # last day of that period(month) is the pos adjustment date if _freqDict[freq] == TimeUnits.Weeks: pos_adjust_date = [ Date.nextWeekday(date, Weekdays.Friday) for date in pos_adjust_date[:-1] ] elif _freqDict[freq] == TimeUnits.Months: pos_adjust_date = [ cal.endOfMonth(date) for date in pos_adjust_date[:-1] ] elif _freqDict[freq] == TimeUnits.Years: pos_adjust_date = [ Date(date.year(), 12, 31) for date in pos_adjust_date[:-1] ] if return_biz_day: pos_adjust_date = [ cal.adjustDate(date, BizDayConventions.Preceding) for date in pos_adjust_date ] pos_adjust_date = [Date.toDateTime(date) for date in pos_adjust_date] pos_adjust_date = [ date for date in pos_adjust_date if date <= d_end_date.toDateTime() ] return pos_adjust_date
def testPeriodPickle(self): p1 = Period('36m') f = tempfile.NamedTemporaryFile('w+b', delete=False) pickle.dump(p1, f) f.close() with open(f.name, 'rb') as f2: pickled_period = pickle.load(f2) self.assertEqual(p1, pickled_period) os.unlink(f.name)
def makeSchedule(firstDate, endDate, tenor, calendar='NullCalendar', dateRule=BizDayConventions.Following): cal = Calendar(calendar) firstDate = check_date(firstDate) endDate = check_date(endDate) tenor = Period(tenor) schedule = Schedule(firstDate, endDate, tenor, cal, convention=dateRule) return [d.toDateTime() for d in schedule]
def testScheduleInitialize(self): startDate = Date(2013, 3, 31) endDate = Date(2013, 6, 30) tenor = Period('1m') cal = Calendar('NullCalendar') sch = Schedule(startDate, endDate, tenor, cal) expected = [ Date(2013, 3, 31), Date(2013, 4, 30), Date(2013, 5, 31), Date(2013, 6, 30) ] for i in range(sch.size()): self.assertEqual(expected[i], sch[i])
def testSchedulePickle(self): startDate = Date(2013, 3, 31) endDate = Date(2013, 6, 30) tenor = Period('1m') cal = Calendar('NullCalendar') sch = Schedule(startDate, endDate, tenor, cal) f = tempfile.NamedTemporaryFile('w+b', delete=False) pickle.dump(sch, f) f.close() with open(f.name, 'rb') as f2: pickled_sch = pickle.load(f2) self.assertEqual(sch, pickled_sch) os.unlink(f.name)
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) alpha_logger.info("return data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) return dates, df[['trade_date', 'code', 'dx']], df[[ 'trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry' ] + transformer.names]
def testAdvanceDate(self): referenceDate = Date(2014, 1, 31) sseCal = Calendar('China.SSE') ibCal = Calendar('China.IB') bizDayConv = BizDayConventions.Following # test null period self.assertEqual( sseCal.advanceDate(referenceDate, Period('0b'), bizDayConv), Date(2014, 2, 7)) # test negative period self.assertEqual( sseCal.advanceDate(referenceDate, Period('-5b'), bizDayConv), Date(2014, 1, 24)) # The difference is caused by Feb 8 is SSE holiday but a working day for IB market self.assertEqual( sseCal.advanceDate(referenceDate, Period('2b'), bizDayConv), Date(2014, 2, 10)) self.assertEqual( sseCal.advanceDate(referenceDate, Period('2d'), bizDayConv), Date(2014, 2, 7)) self.assertEqual( ibCal.advanceDate(referenceDate, Period('2b'), bizDayConv), Date(2014, 2, 8)) self.assertEqual( ibCal.advanceDate(referenceDate, Period('2d'), bizDayConv), Date(2014, 2, 7)) bizDayConv = BizDayConventions.ModifiedFollowing # May 31, 2014 is a holiday self.assertEqual( sseCal.advanceDate(referenceDate, Period('4m'), bizDayConv, True), Date(2014, 5, 30))
def fetch_train_phase(engine, alpha_factors: Union[Transformer, Iterable[object]], ref_date, frequency, universe, batch=1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0, fit_target: Union[Transformer, object] = None) -> dict: if isinstance(alpha_factors, Transformer): transformer = alpha_factors else: transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) horizon = map_freq(frequency) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() target_df, factor_df = df[['trade_date', 'code', 'dx']], df[['trade_date', 'code'] + transformer.names] target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \ _merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model, neutralized_risk) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): pyFinAssert( len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date)) end = dates[-2] start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0] else: end = dates[-1] start = dates[-batch] if batch <= len(dates) else dates[0] index = (date_label >= start) & (date_label <= end) this_raw_x = x_values[index] this_raw_y = y_values[index] this_code = codes[index] if risk_exp is not None: this_risk_exp = risk_exp[index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ne_y = factor_processing(this_raw_y, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ret = dict() ret['x_names'] = transformer.names ret['train'] = { 'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code } return ret
def testPeriodDeepCopy(self): p1 = Period('36m') p2 = copy.deepcopy(p1) self.assertEqual(p1, p2)
def fetch_train_phase(engine, alpha_factors: Iterable[object], ref_date, frequency, universe, batch, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0) -> dict: transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch + 1) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) horizon = _map_horizon(frequency) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna() return_df, factor_df = df[['trade_date', 'code', 'dx']], df[['trade_date', 'code', 'isOpen'] + transformer.names] return_df, dates, date_label, risk_exp, x_values, y_values, _, _ = \ _merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): end = dates[-2] start = dates[-batch - 1] else: end = dates[-1] start = dates[-batch] index = (date_label >= start) & (date_label <= end) this_raw_x = x_values[index] this_raw_y = y_values[index] if risk_exp is not None: this_risk_exp = risk_exp[index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ne_y = factor_processing(this_raw_y, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ret = dict() ret['x_names'] = transformer.names ret['train'] = {'x': ne_x, 'y': ne_y} return ret
def testComparingOperators(self): p1 = Period(length=0, units=TimeUnits.Days) p2 = Period(length=1, units=TimeUnits.Days) self.assertTrue(p1 < p2) p1 = Period(length=13, units=TimeUnits.Months) p2 = Period(length=1, units=TimeUnits.Years) self.assertTrue(not p1 < p2) p1 = Period(length=1, units=TimeUnits.Years) p2 = Period(length=13, units=TimeUnits.Months) self.assertTrue(p1 < p2) p1 = Period(length=13, units=TimeUnits.Days) p2 = Period(length=2, units=TimeUnits.Weeks) self.assertTrue(p1 < p2) p1 = Period(length=2, units=TimeUnits.Weeks) p2 = Period(length=13, units=TimeUnits.Days) self.assertTrue(not p1 < p2) p1 = Period(length=1, units=TimeUnits.Years) p2 = Period(length=56, units=TimeUnits.Weeks) self.assertTrue(p1 < p2) p1 = Period(length=56, units=TimeUnits.Weeks) p2 = Period(length=1, units=TimeUnits.Years) self.assertTrue(not p1 < p2) p1 = Period(length=21, units=TimeUnits.Weeks) p2 = Period(length=5, units=TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 < p2 p1 = Period(length=21, units=TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 < p2 # test not equal operator p1 = Period(length=1, units=TimeUnits.Days) p2 = Period(length=1, units=TimeUnits.Days) self.assertTrue(not p1 != p2) p2 = Period(length=1, units=TimeUnits.Years) self.assertTrue(p1 != p2) # test greater than operator p1 = Period(length=1, units=TimeUnits.Days) p2 = Period(length=2, units=TimeUnits.Days) self.assertEqual(p1 < p2, not p1 > p2)
def dcam_strat_main(factor_loader_params, analyzer_params, selector_params, portfolio_params, update_params): # FactorLoader params start_date = factor_loader_params['start_date'] end_date = factor_loader_params['end_date'] factor_norm_dict = factor_loader_params['factor_norm_dict'] na_handler = factor_loader_params.get('na_handler', FactorNAHandler.Ignore) # dcam analyzer params factor_weight_type = analyzer_params.get('factor_weight_type', FactorWeightType.ICWeight) tiaocang_date_window_size = analyzer_params.get( 'tiaocang_date_window_size', 12) save_sec_score = analyzer_params.get('save_sec_score', True) # selector params save_sec_selected = selector_params.get('save_sec_selected', True) nb_sec_selected_per_industry_min = selector_params.get( 'nb_sec_selected_per_industry_min', 5) use_industry_name = selector_params.get('use_industry_name', True) nb_sec_selected_total = selector_params.get('nb_sec_selected_total', 100) ignore_zero_weight = selector_params.get('ignore_zero_weight', False) # portfolio params benchmark_sec_id = portfolio_params.get('benchmark_sec_id', '000905.SH') re_balance_freq = portfolio_params.get('re_balance_freq', FreqType.EOM) initial_capital = portfolio_params.get('initial_capital', 1000000000.0) filter_return_on_tiaocang_date = portfolio_params.get( 'filter_return_on_tiaocang_date', 0.09) data_source = portfolio_params.get('data_source', DataSource.WIND) save_perf_file = portfolio_params.get('save_perf_file', False) risk_free = portfolio_params.get('risk_free', 0.0) update_factor = update_params.get('update_factor', False) update_sec_score = update_params.get('update_sec_score', False) update_sec_select = update_params.get('update_sec_select', False) factor = FactorLoader(start_date=start_date, end_date=end_date, factor_norm_dict=factor_norm_dict, na_handler=na_handler) if update_factor: factor_data = factor.get_norm_factor_data() pickle_dump_data(factor_data, _factor_pkl_path) else: factor_data = pickle_load_data(_factor_pkl_path) if update_sec_score: layer_factor = [ factor_data[name] for name in factor_norm_dict.keys() if factor_norm_dict[name][1] == DCAMFactorType.layerFactor ] alpha_factor = [ factor_data[name] for name in factor_norm_dict.keys() if factor_norm_dict[name][1] == DCAMFactorType.alphaFactor ] alpha_factor_sign = [ factor_data[name][2] for name in factor_norm_dict.keys() if factor_norm_dict[name][1] == DCAMFactorType.alphaFactor ] analyzer = DCAMAnalyzer( layer_factor=layer_factor, alpha_factor=alpha_factor, sec_return=factor_data['RETURN'], tiaocang_date=factor.get_tiaocang_date(), tiaocang_date_window_size=tiaocang_date_window_size, save_sec_score=save_sec_score, factor_weight_type=factor_weight_type, alpha_factor_sign=alpha_factor_sign) sec_score = analyzer.calc_sec_score() else: sec_score = load_sec_score(_sec_score_path) if update_sec_select: index_comp = IndexComp(industry_weight=factor_data['IND_WGT']) selector = Selector( sec_score=sec_score, industry=factor_data['INDUSTRY'], nb_sec_selected_per_industry_min=nb_sec_selected_per_industry_min, index_comp=index_comp, save_sec_selected=save_sec_selected, use_industry_name=use_industry_name, nb_sec_selected_total=nb_sec_selected_total, ignore_zero_weight=ignore_zero_weight) selector.industry_neutral = True selector.sec_selection() sec_selected = selector.sec_selected_full_info pprint(selector.sec_selected_full_info) else: sec_selected = load_sec_selected(_sec_selected_path) # construct strategy ptf # 价格数据需要使用到最后一个调仓日的后一个月末 sse_cal = Calendar('China.SSE') end_date_for_price_data = str( sse_cal.advanceDate(Date.strptime(end_date), Period('1m'))) strategy = Portfolio( sec_selected=sec_selected, end_date=end_date_for_price_data, initial_capital=initial_capital, filter_return_on_tiaocang_date=filter_return_on_tiaocang_date, data_source=data_source, benchmark_sec_id=benchmark_sec_id, re_balance_freq=re_balance_freq, save_perf_file=save_perf_file, risk_free=risk_free) strategy.evaluate_ptf_return()
def testYearsMonthsAlgebra(self): oneYear = Period(1, TimeUnits.Years) sixMonths = Period(6, TimeUnits.Months) threeMonths = Period(3, TimeUnits.Months) n = 4 flag = oneYear / n == threeMonths self.assertTrue(flag, "division error: {0} / {1:d}" " not equal to {2}".format(oneYear, n, threeMonths)) n = 2 flag = oneYear / n == sixMonths self.assertTrue(flag, "division error: {0} / {1:d}" " not equal to {2}".format(oneYear, n, sixMonths)) sum = threeMonths sum += sixMonths flag = sum == Period(9, TimeUnits.Months) self.assertTrue(flag, "sum error: {0}" " + {1}" " != {2}".format(threeMonths, sixMonths, Period(9, TimeUnits.Months))) sum += oneYear flag = sum == Period(21, TimeUnits.Months) self.assertTrue(flag, "sum error: {0}" " + {1}" " + {2}" " != {3}".format(threeMonths, sixMonths, oneYear, Period(21, TimeUnits.Months))) twelveMonths = Period(12, TimeUnits.Months) flag = twelveMonths.length == 12 self.assertTrue(flag, "normalization error: TwelveMonths.length" " is {0:d}" " instead of 12".format(twelveMonths.length)) flag = twelveMonths.units == TimeUnits.Months self.assertTrue(flag, "normalization error: TwelveMonths.units" " is {0:d}" " instead of {1:d}".format(twelveMonths.units, TimeUnits.Months)) normalizedTwelveMonths = Period(12, TimeUnits.Months) normalizedTwelveMonths = normalizedTwelveMonths.normalize() flag = normalizedTwelveMonths.length == 1 self.assertTrue(flag, "normalization error: TwelveMonths.length" " is {0:d}" " instead of 1".format(twelveMonths.length)) flag = normalizedTwelveMonths.units == TimeUnits.Years self.assertTrue(flag, "normalization error: TwelveMonths.units" " is {0:d}" " instead of {1:d}".format(twelveMonths.units, TimeUnits.Years)) thirtyDays = Period(30, TimeUnits.Days) normalizedThirtyDays = thirtyDays.normalize() flag = normalizedThirtyDays.units == TimeUnits.Days self.assertTrue(flag, "normalization error: ThirtyDays.units" " is {0:d}" " instead of {1:d}".format(normalizedThirtyDays.units, TimeUnits.Days)) thirtyBDays = Period(30, TimeUnits.BDays) normalizedThirtyBDays = thirtyBDays.normalize() flag = normalizedThirtyBDays.units == TimeUnits.BDays self.assertTrue(flag, "normalization error: ThirtyBDays.units" " is {0:d}" " instead of {1:d}".format(normalizedThirtyBDays.units, TimeUnits.BDays))
def prepare_data(engine: SqlEngine, factors: Union[Transformer, Iterable[object]], start_date: str, end_date: str, frequency: str, universe: Universe, benchmark: int, warm_start: int = 0, fit_target: Union[Transformer, object] = None): if warm_start > 0: p = Period(frequency) p = Period(length=-warm_start * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', start_date, p).strftime('%Y-%m-%d') dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Forward) dates = [d.strftime('%Y-%m-%d') for d in dates] horizon = map_freq(frequency) if isinstance(factors, Transformer): transformer = factors else: transformer = Transformer(factors) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).sort_values( ['trade_date', 'code']) alpha_logger.info("factor data loading finished") if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) alpha_logger.info("fit target data loading finished") industry_df = engine.fetch_industry_range(universe, dates=dates) alpha_logger.info("industry data loading finished") benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates) alpha_logger.info("benchmark data loading finished") df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna() df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left') df = pd.merge(df, industry_df, on=['trade_date', 'code']) df['weight'] = df['weight'].fillna(0.) df.dropna(inplace=True) return dates, df[[ 'trade_date', 'code', 'dx' ]], df[['trade_date', 'code', 'weight', 'industry_code', 'industry'] + transformer.names]
def testBasicArithmic(self): # test bad normalize testPriod = Period(length=1, units=TimeUnits.Years) testPriod._units = 10 with self.assertRaises(TypeError): testPriod.normalize() # test plus method p1 = Period(length=0, units=TimeUnits.Days) p2 = Period(length=10, units=TimeUnits.Months) calculated = p1 + p2 self.assertEqual( p2, calculated, "added value {0} should be equal to {1}".format(calculated, p2)) p1 = Period(length=2, units=TimeUnits.Years) p2 = Period(length=13, units=TimeUnits.Months) calculated = p1 + p2 expected = Period(length=37, units=TimeUnits.Months) self.assertEqual( expected, calculated, "added value {0} should be equal to {1}".format( calculated, expected)) p2 = Period(length=2, units=TimeUnits.Weeks) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Days) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(length=13, units=TimeUnits.Months) p2 = Period(length=2, units=TimeUnits.Years) calculated = p1 + p2 expected = Period(length=37, units=TimeUnits.Months) self.assertEqual( expected, calculated, "added value {0} should be equal to {1}".format( calculated, expected)) p2 = Period(length=2, units=TimeUnits.Weeks) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Days) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(length=2, units=TimeUnits.Weeks) p2 = Period(length=7, units=TimeUnits.Days) calculated = p1 + p2 expected = Period(length=21, units=TimeUnits.Days) self.assertEqual( expected, calculated, "added value {0} should be equal to {1}".format( calculated, expected)) p2 = Period(length=2, units=TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Years) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(length=7, units=TimeUnits.Days) p2 = Period(length=2, units=TimeUnits.Weeks) calculated = p1 + p2 expected = Period(length=21, units=TimeUnits.Days) self.assertEqual( expected, calculated, "added value {0} should be equal to {1}".format( calculated, expected)) p2 = Period(length=2, units=TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Years) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(length=7, units=TimeUnits.BDays) p2 = Period(length=2, units=TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Days) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Weeks) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.Years) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(length=2, units=TimeUnits.BDays) self.assertEqual(p1 + p2, Period('9B')) # test negative operator p1 = Period(length=-13, units=TimeUnits.Weeks) p2 = -p1 self.assertEqual(p2, Period(length=13, units=TimeUnits.Weeks)) # test less operator p1 = Period(length=0, units=TimeUnits.Days) p2 = Period(length=-3, units=TimeUnits.BDays) self.assertTrue(p2 < p1) # test sub operator p1 = Period(length=0, units=TimeUnits.Days) p2 = Period(length=-3, units=TimeUnits.BDays) self.assertEqual(p1 - p2, Period('3b')) # test string representation p1 = Period(length=12, units=TimeUnits.Months) self.assertEqual("1Y", p1.__str__())
def testWeeksDaysAlgebra(self): twoWeeks = Period(length=2, units=TimeUnits.Weeks) oneWeek = Period(length=1, units=TimeUnits.Weeks) threeDays = Period(length=3, units=TimeUnits.Days) oneDay = Period(length=1, units=TimeUnits.Days) n = 2 flag = twoWeeks / n == oneWeek self.assertTrue( flag, "division error: {0} / {1:d}" " not equal to {2}".format(twoWeeks, n, oneWeek)) n = 7 flag = oneWeek / 7 == oneDay self.assertTrue( flag, "division error: {0} / {1:d}" " not equal to {2}".format(oneWeek, n, oneDay)) sum = threeDays sum += oneDay flag = sum == Period(length=4, units=TimeUnits.Days) self.assertTrue( flag, "sum error: {0}" " + {1}" " != {2}".format(threeDays, oneDay, Period(length=4, units=TimeUnits.Days))) sum += oneWeek flag = sum == Period(length=11, units=TimeUnits.Days) self.assertTrue( flag, "sum error: {0}" " + {1}" " + {2}" " != {3}".format(threeDays, oneDay, oneWeek, Period(length=11, units=TimeUnits.Days))) sevenDays = Period(length=7, units=TimeUnits.Days) flag = sevenDays.length() == 7 self.assertTrue( flag, "normalization error: sevenDays.length" " is {0:d}" " instead of 7".format(sevenDays.length())) flag = sevenDays.units() == TimeUnits.Days self.assertTrue( flag, "normalization error: sevenDays.units" " is {0:d}" " instead of {1:d}".format(sevenDays.units(), TimeUnits.Days)) normalizedSevenDays = sevenDays.normalize() flag = normalizedSevenDays.length() == 1 self.assertTrue( flag, "normalization error: normalizedSevenDays.length" " is {0:d}" " instead of 1".format(normalizedSevenDays.length())) flag = normalizedSevenDays.units() == TimeUnits.Weeks self.assertTrue( flag, "normalization error: TwelveMonths.units" " is {0:d}" " instead of {1:d}".format(normalizedSevenDays.units(), TimeUnits.Weeks))
def testComparingOperators(self): p1 = Period(0, TimeUnits.Days) p2 = Period(1, TimeUnits.Days) self.assertTrue(p1 < p2) p1 = Period(13, TimeUnits.Months) p2 = Period(1, TimeUnits.Years) self.assertTrue(not p1 < p2) p1 = Period(1, TimeUnits.Years) p2 = Period(13, TimeUnits.Months) self.assertTrue(p1 < p2) p1 = Period(13, TimeUnits.Days) p2 = Period(2, TimeUnits.Weeks) self.assertTrue(p1 < p2) p1 = Period(2, TimeUnits.Weeks) p2 = Period(13, TimeUnits.Days) self.assertTrue(not p1 < p2) p1 = Period(1, TimeUnits.Years) p2 = Period(56, TimeUnits.Weeks) self.assertTrue(p1 < p2) p1 = Period(56, TimeUnits.Weeks) p2 = Period(1, TimeUnits.Years) self.assertTrue(not p1 < p2) p1 = Period(21, TimeUnits.Weeks) p2 = Period(5, TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 < p2 p1 = Period(21, TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 < p2 # test not equal operator p1 = Period(1, TimeUnits.Days) p2 = Period(1, TimeUnits.Days) self.assertTrue(not p1 != p2) p2 = Period(1, TimeUnits.Years) self.assertTrue(p1 != p2) # test greater than operator p1 = Period(1, TimeUnits.Days) p2 = Period(2, TimeUnits.Days) self.assertEqual(p1 < p2, not p1 > p2)
def testBasicFunctions(self): year = 2015 month = 7 day = 24 strRepr = "{0}-{1:02d}-{2:02d}".format(year, month, day) innerRepr = "Date({0}, {1}, {2})".format(year, month, day) testDate = Date(year, month, day) self.assertEqual( str(testDate), strRepr, "date string:\n" "expected: {0:s}\n" "calculated: {1:s}".format(strRepr, str(testDate))) self.assertEqual( repr(testDate), innerRepr, "date representation:\n" "expected: {0:s}\n" "calculated: {1:s}".format(innerRepr, repr(testDate))) self.assertEqual( testDate.year(), year, "date year:\n" "expected: {0:d}\n" "calculated: {1:d}".format(year, testDate.year())) self.assertEqual( testDate.month(), month, "date month:\n" "expected: {0:d}\n" "calculated: {1:d}".format(month, testDate.month())) self.assertEqual( testDate.dayOfMonth(), day, "date day:\n" "expected: {0:d}\n" "calculated: {1:d}".format(day, testDate.dayOfMonth())) self.assertEqual( testDate.dayOfYear(), testDate - Date(2015, 1, 1) + 1, "date day:\n" "expected: {0:d}\n" "calculated: {1:d}".format(testDate - Date(2015, 1, 1) + 1, testDate.dayOfYear())) self.assertEqual( testDate.weekday(), 6, "date weekday:\n" "expected: {0:d}\n" "calculated: {1:d}".format(5, testDate.weekday())) self.assertEqual( testDate.toDateTime(), dt.datetime(year, month, day), "date datetime representation\n" "expected: {0}\n" "calculated: {1}".format(dt.datetime(year, month, day), testDate.toDateTime())) serialNumber = testDate.serialNumber serialDate = Date(serialNumber=serialNumber) self.assertEqual( serialDate, testDate, "date excel serial number representation\n" "expected: {0:d}" "calculated: {1:d}".format(serialDate.serialNumber, testDate.serialNumber)) # test comparisons previousDate = testDate - 1 self.assertTrue( previousDate < testDate, "{0} is not earlier than {1}".format(previousDate, testDate)) self.assertFalse( previousDate >= testDate, "{0} should not be later than or equal to {1}".format( previousDate, testDate)) self.assertTrue((previousDate + 1) == testDate, "{0} plus one day should be equal to {1}".format( previousDate, testDate)) # check static members self.assertEqual(Date.minDate(), Date(1901, 1, 1), "min date is wrong") self.assertEqual(Date.maxDate(), Date(2199, 12, 31), "max date is wrong") self.assertEqual(Date.endOfMonth(testDate), Date(year, month, 31), "end of month is wrong") self.assertTrue(Date.isEndOfMonth(Date(year, month, 31)), "{0} should be the end of month") self.assertEqual( Date.nextWeekday(testDate, testDate.weekday()), testDate, "{0}'s next same week day should be {1}".format( testDate, testDate)) expectedDate = dt.date.today() expectedDate = dt.datetime(expectedDate.year, expectedDate.month, expectedDate.day) self.assertEqual( Date.todaysDate().toDateTime(), expectedDate, "today's date\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, Date.todaysDate())) # nth-week day with self.assertRaises(ValueError): _ = Date.nthWeekday(0, Weekdays.Friday, 1, 2015) with self.assertRaises(ValueError): _ = Date.nthWeekday(6, Weekdays.Friday, 1, 2015) self.assertEqual(Date.nthWeekday(3, Weekdays.Wednesday, 8, 2015), Date(2015, 8, 19)) # check plus/sub threeWeeksAfter = testDate + '3W' expectedDate = testDate + 21 self.assertEqual( threeWeeksAfter, expectedDate, "date + 3w period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, threeWeeksAfter)) threeMonthsBefore = testDate - "3M" expectedDate = Date(year, month - 3, day) self.assertEqual( threeMonthsBefore, expectedDate, "date - 3m period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, threeMonthsBefore)) threeMonthsBefore = testDate - Period("3M") expectedDate = Date(year, month - 3, day) self.assertEqual( threeMonthsBefore, expectedDate, "date - 3m period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, threeMonthsBefore)) threeMonthsAfter = testDate + "3m" expectedDate = Date(year, month + 3, day) self.assertEqual( threeMonthsAfter, expectedDate, "date + 3m period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, threeMonthsAfter)) oneYearAndTwoMonthsBefore = testDate - "14m" expectedDate = Date(year - 1, month - 2, day) self.assertEqual( oneYearAndTwoMonthsBefore, expectedDate, "date - 14m period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, threeMonthsBefore)) oneYearAndTwoMonthsBefore = testDate + "14m" expectedDate = Date(year + 1, month + 2, day) self.assertEqual( oneYearAndTwoMonthsBefore, expectedDate, "date + 14m period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, threeMonthsBefore)) fiveMonthsAfter = testDate + "5m" expectedDate = Date(year, month + 5, day) self.assertEqual( fiveMonthsAfter, expectedDate, "date + 5m period\n" "expected: {0}\n" "calculated: {1}".format(expectedDate, fiveMonthsAfter))
def fetch_predict_phase(engine, alpha_factors: Iterable[object], ref_date, frequency, universe, batch, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0): transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).dropna() names = transformer.names if neutralized_risk: risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1] used_neutralized_risk = list(set(neutralized_risk).difference(names)) risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) risk_exp = train_x[neutralized_risk].values.astype(float) x_values = train_x[names].values.astype(float) else: train_x = factor_df.copy() risk_exp = None date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime() dates = np.unique(date_label) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): end = dates[-1] start = dates[-batch] # index = (date_label >= start) & (date_label <= end) left_index = bisect.bisect_left(date_label, start) right_index = bisect.bisect_right(date_label, end) this_raw_x = x_values[left_index:right_index] sub_dates = date_label[left_index:right_index] if risk_exp is not None: this_risk_exp = risk_exp[left_index:right_index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) inner_left_index = bisect.bisect_left(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end) ne_x = ne_x[inner_left_index:inner_right_index] left_index = bisect.bisect_left(date_label, end) right_index = bisect.bisect_right(date_label, end) codes = train_x.code.values[left_index:right_index] else: ne_x = None codes = None ret = dict() ret['x_names'] = transformer.names ret['predict'] = {'x': ne_x, 'code': codes} return ret
def testBasicArithmic(self): # test bad normalize testPriod = Period(1, TimeUnits.Years) testPriod._units = 10 with self.assertRaises(TypeError): testPriod.normalize() # test plus method p1 = Period(0, TimeUnits.Days) p2 = Period(10, TimeUnits.Months) calculated = p1 + p2 self.assertEqual(p2, calculated, "added value {0} should be equal to {1}".format(calculated, p2)) p1 = Period(2, TimeUnits.Years) p2 = Period(13, TimeUnits.Months) calculated = p1 + p2 expected = Period(37, TimeUnits.Months) self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected)) p2 = Period(2, TimeUnits.Weeks) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Days) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(13, TimeUnits.Months) p2 = Period(2, TimeUnits.Years) calculated = p1 + p2 expected = Period(37, TimeUnits.Months) self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected)) p2 = Period(2, TimeUnits.Weeks) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Days) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(2, TimeUnits.Weeks) p2 = Period(7, TimeUnits.Days) calculated = p1 + p2 expected = Period(21, TimeUnits.Days) self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected)) p2 = Period(2, TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Years) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(7, TimeUnits.Days) p2 = Period(2, TimeUnits.Weeks) calculated = p1 + p2 expected = Period(21, TimeUnits.Days) self.assertEqual(expected, calculated, "added value {0} should be equal to {1}".format(calculated, expected)) p2 = Period(2, TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.BDays) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Years) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p1 = Period(7, TimeUnits.BDays) p2 = Period(2, TimeUnits.Months) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Days) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Weeks) with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.Years) with self.assertRaises(ValueError): _ = p1 + p2 p2._units = 10 with self.assertRaises(ValueError): _ = p1 + p2 p2 = Period(2, TimeUnits.BDays) self.assertEqual(p1 + p2, Period('9B')) # test negative operator p1 = Period(-13, TimeUnits.Weeks) p2 = -p1 self.assertEqual(p2, Period(13, TimeUnits.Weeks)) # test less operator p1 = Period(0, TimeUnits.Days) p2 = Period(-3, TimeUnits.BDays) self.assertTrue(p2 < p1) # test sub operator p1 = Period(0, TimeUnits.Days) p2 = Period(-3, TimeUnits.BDays) self.assertEqual(p1 - p2, Period('3b')) # test string representation p1 = Period(12, TimeUnits.Months) self.assertEqual("1Y", p1.__str__())
def fetch_predict_phase(engine, alpha_factors: Union[Transformer, Iterable[object]], ref_date, frequency, universe, batch=1, neutralized_risk: Iterable[str] = None, risk_model: str = 'short', pre_process: Iterable[object] = None, post_process: Iterable[object] = None, warm_start: int = 0, fillna: str = None, fit_target: Union[Transformer, object] = None): if isinstance(alpha_factors, Transformer): transformer = alpha_factors else: transformer = Transformer(alpha_factors) p = Period(frequency) p = Period(length=-(warm_start + batch - 1) * p.length(), units=p.units()) start_date = advanceDateByCalendar('china.sse', ref_date, p, BizDayConventions.Following) dates = makeSchedule(start_date, ref_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following, dateGenerationRule=DateGeneration.Backward) horizon = map_freq(frequency) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) if fillna: factor_df = factor_df.groupby('trade_date').apply( lambda x: x.fillna(x.median())).reset_index(drop=True).dropna() else: factor_df = factor_df.dropna() if fit_target is None: target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) else: one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df.groupby('code').apply( lambda x: x.fillna(method='pad')) names = transformer.names if neutralized_risk: risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1] used_neutralized_risk = list(set(neutralized_risk).difference(names)) risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(train_x, target_df, on=['trade_date', 'code'], how='left') risk_exp = train_x[neutralized_risk].values.astype(float) else: train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left') risk_exp = None train_x.dropna(inplace=True, subset=train_x.columns[:-1]) x_values = train_x[names].values.astype(float) y_values = train_x[['dx']].values.astype(float) date_label = pd.DatetimeIndex(train_x.trade_date).to_pydatetime() dates = np.unique(date_label) if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): end = dates[-1] start = dates[-batch] if batch <= len(dates) else dates[0] left_index = bisect.bisect_left(date_label, start) right_index = bisect.bisect_right(date_label, end) this_raw_x = x_values[left_index:right_index] this_raw_y = y_values[left_index:right_index] sub_dates = date_label[left_index:right_index] if risk_exp is not None: this_risk_exp = risk_exp[left_index:right_index] else: this_risk_exp = None ne_x = factor_processing(this_raw_x, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) ne_y = factor_processing(this_raw_y, pre_process=pre_process, risk_factors=this_risk_exp, post_process=post_process) inner_left_index = bisect.bisect_left(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end) ne_x = ne_x[inner_left_index:inner_right_index] ne_y = ne_y[inner_left_index:inner_right_index] left_index = bisect.bisect_left(date_label, end) right_index = bisect.bisect_right(date_label, end) codes = train_x.code.values[left_index:right_index] else: ne_x = None ne_y = None codes = None ret = dict() ret['x_names'] = transformer.names ret['predict'] = { 'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes, 'y': ne_y.flatten() } return ret
def testYearsMonthsAlgebra(self): oneYear = Period(length=1, units=TimeUnits.Years) sixMonths = Period(length=6, units=TimeUnits.Months) threeMonths = Period(length=3, units=TimeUnits.Months) n = 4 flag = oneYear / n == threeMonths self.assertTrue( flag, "division error: {0} / {1:d}" " not equal to {2}".format(oneYear, n, threeMonths)) n = 2 flag = oneYear / n == sixMonths self.assertTrue( flag, "division error: {0} / {1:d}" " not equal to {2}".format(oneYear, n, sixMonths)) sum = threeMonths sum += sixMonths flag = sum == Period(length=9, units=TimeUnits.Months) self.assertTrue( flag, "sum error: {0}" " + {1}" " != {2}".format(threeMonths, sixMonths, Period(length=9, units=TimeUnits.Months))) sum += oneYear flag = sum == Period(length=21, units=TimeUnits.Months) self.assertTrue( flag, "sum error: {0}" " + {1}" " + {2}" " != {3}".format(threeMonths, sixMonths, oneYear, Period(length=21, units=TimeUnits.Months))) twelveMonths = Period(length=12, units=TimeUnits.Months) flag = twelveMonths.length() == 12 self.assertTrue( flag, "normalization error: TwelveMonths.length" " is {0:d}" " instead of 12".format(twelveMonths.length())) flag = twelveMonths.units() == TimeUnits.Months self.assertTrue( flag, "normalization error: TwelveMonths.units" " is {0:d}" " instead of {1:d}".format(twelveMonths.units(), TimeUnits.Months)) normalizedTwelveMonths = Period(length=12, units=TimeUnits.Months) normalizedTwelveMonths = normalizedTwelveMonths.normalize() flag = normalizedTwelveMonths.length() == 1 self.assertTrue( flag, "normalization error: TwelveMonths.length" " is {0:d}" " instead of 1".format(twelveMonths.length())) flag = normalizedTwelveMonths.units() == TimeUnits.Years self.assertTrue( flag, "normalization error: TwelveMonths.units" " is {0:d}" " instead of {1:d}".format(twelveMonths.units(), TimeUnits.Years)) thirtyDays = Period(length=30, units=TimeUnits.Days) normalizedThirtyDays = thirtyDays.normalize() flag = normalizedThirtyDays.units() == TimeUnits.Days self.assertTrue( flag, "normalization error: ThirtyDays.units" " is {0:d}" " instead of {1:d}".format(normalizedThirtyDays.units(), TimeUnits.Days)) thirtyBDays = Period(length=30, units=TimeUnits.BDays) normalizedThirtyBDays = thirtyBDays.normalize() flag = normalizedThirtyBDays.units() == TimeUnits.BDays self.assertTrue( flag, "normalization error: ThirtyBDays.units" " is {0:d}" " instead of {1:d}".format(normalizedThirtyBDays.units(), TimeUnits.BDays))