def loadon_data(self, trade_date): db_polymerize = DBPolymerize(self._name) max_windows = self._maximization_windows() begin_date = advanceDateByCalendar('china.sse', trade_date, '-%sb' % (max_windows + 1)) total_data = db_polymerize.fetch_data(begin_date, trade_date, '1b') return total_data
def loadon_data(self, begin_date, end_date, benchmark_code_dict, table): # 需要增加因子方向 db_polymerize = DBPolymerize(self._name) db_factor = FetchRLFactorEngine(table) factor_data = db_factor.fetch_factors(begin_date=begin_date, end_date=end_date) benchmark_data, market_data, exposure_data = db_polymerize.fetch_integrated_data(begin_date, end_date) # 针对不同的基准 total_data_dict = {} benchmark_industry_weights_dict = {} for key, value in benchmark_code_dict.items(): total_data = self.preprocessing(benchmark_data[benchmark_data.index_code == key], market_data, factor_data, exposure_data) # 因子标准化 total_data = total_data.sort_values(['trade_date', 'security_code']) # 因子中性化 total_data = total_data.groupby(['trade_date']).apply(self._factor_preprocess) total_data.loc[:, self._factor_columns] = total_data.loc[:, self._factor_columns].fillna(0) benchmark_industry_weights = benchmark_data[benchmark_data.index_code == key].groupby( ['trade_date', 'industry_code']).apply(lambda x: x['weighing'].sum()) benchmark_industry_weights = benchmark_industry_weights.unstack().fillna(0) total_data_dict[value] = total_data benchmark_industry_weights_dict[value] = benchmark_industry_weights return total_data_dict, benchmark_industry_weights_dict
def loadon_data(self, trade_date): db_polymerize = DBPolymerize(self._name) max_windows = self._maximization_windows() begin_date = advanceDateByCalendar('china.sse', trade_date, '-%sb' % (max_windows + 1)) market_data, index_data = db_polymerize.fetch_volatility_value_data( begin_date, trade_date, '1b') # market_data, index_data = db_polymerize.fetch_volatility_value_data('2018-08-15', trade_date, '1b') return market_data, index_data
def loadon_data(self, begin_date, trade_date, table): # 确定获取的日期范围 freq = '1m' benchmark_code_dict = { '000905.XSHG': '2070000187', '000300.XSHG': '2070000060' } db_factor = FetchRLFactorEngine(table) factor_data = db_factor.fetch_factors(begin_date=begin_date, end_date=trade_date, freq=freq) db_polymerize = DBPolymerize(self._name) benchmark_data, index_data, market_data, exposure_data = db_polymerize.fetch_performance_data( benchmark_code_dict.keys(), begin_date, trade_date, freq) # 针对不同的基准 total_data_dict = {} index_rets_dict = {} benchmark_industry_weights_dict = {} for key, value in benchmark_code_dict.items(): total_data, index_rets, coverage_rate = self.performance_preprocessing( benchmark_data[benchmark_data.index_code == key], index_data[index_data.security_code == value], market_data, factor_data, exposure_data) # 中性化处理,因子值填充待修改 total_data = total_data.sort_values( ['trade_date', 'security_code']) total_data = total_data.groupby(['trade_date' ]).apply(self._factor_preprocess) total_data.loc[:, self. _factor_columns] = total_data.loc[:, self. _factor_columns].fillna( 0) benchmark_industry_weights = benchmark_data[ benchmark_data.index_code == key].groupby( ['trade_date', 'industry_code']).apply(lambda x: x['weighing'].sum()) benchmark_industry_weights = benchmark_industry_weights.unstack( ).fillna(0) total_data_dict[value] = total_data index_rets_dict[value] = index_rets benchmark_industry_weights_dict[value] = benchmark_industry_weights return total_data_dict, benchmark_industry_weights_dict, index_rets_dict
def loadon_data(self, trade_date): benchmark = ['000905.XSHG', '000300.XSHG'] db_polymerize = DBPolymerize(self._name) benchmark_data, index_data, market_data, factor_data, exposure_data = db_polymerize.fetch_performance_data( benchmark, '2018-08-19', trade_date, '1b') total_data, index_se_dict = self.performance_preprocessing( benchmark_data, index_data, market_data, factor_data, exposure_data) #中性化处理 total_data = total_data.sort_values(['trade_date', 'security_code']) total_data = total_data.groupby(['trade_date' ]).apply(self._factor_preprocess) benchmark_industry_weights = benchmark_data.groupby( ['trade_date', 'industry_code']).apply(lambda x: x['weighing'].sum()) benchmark_industry_weights = benchmark_industry_weights.unstack( ).fillna(0) return total_data, benchmark_industry_weights, index_se_dict,
def loadon_data(self, begin_date, end_date, benchmark_code_dict, table): # 需要增加因子方向 db_polymerize = DBPolymerize(self._name) db_factor = FetchRLFactorEngine(table) factor_data = db_factor.fetch_factors(begin_date=begin_date, end_date=end_date) benchmark_data, market_data, exposure_data = db_polymerize.fetch_integrated_data( begin_date, end_date) # 获取因子列表 self._factor_columns = [ i for i in factor_data.columns if i not in ['id', 'trade_date', 'security_code'] ] # 针对不同的基准 total_data_dict = {} benchmark_industry_weights_dict = {} coverage_rate_dict = {} factor_direction_dict = {} # 获取因子方向 db_url = '''mysql+mysqlconnector://{0}:{1}@{2}:{3}/{4}'''.format( config.rl_db_user, config.rl_db_pwd, config.rl_db_host, config.rl_db_port, config.rl_db_database) def get_factor_direction(factors_string): destination = sa.create_engine(db_url) sql = """select factor_name, universe, factor_direction from factor_performance_ic_ir_sub_test where time_type='10' and factor_name in ( {0} );""".format( factors_string) try: factor_sets = pd.read_sql(sql, destination) return factor_sets except: print("failed to get the factor directions!") factor_str = ','.join( ["'" + str(factor) + "'" for factor in self._factor_columns]) factor_directions = get_factor_direction(factor_str) for key, value in benchmark_code_dict.items(): total_data, coverage_rate = self.preprocessing( benchmark_data[benchmark_data.index_code == key], market_data, factor_data, exposure_data) # 因子标准化 total_data = total_data.sort_values( ['trade_date', 'security_code']) total_data = total_data.groupby(['trade_date' ]).apply(self._factor_preprocess) total_data.loc[:, self. _factor_columns] = total_data.loc[:, self. _factor_columns].fillna( 0) benchmark_industry_weights = benchmark_data[ benchmark_data.index_code == key].groupby( ['trade_date', 'industry_code']).apply(lambda x: x['weighing'].sum()) benchmark_industry_weights = benchmark_industry_weights.unstack( ).fillna(0) total_data_dict[value] = total_data benchmark_industry_weights_dict[value] = benchmark_industry_weights coverage_rate_dict[value] = coverage_rate factor_direction_dict[value] = factor_directions[ factor_directions.universe == key] return total_data_dict, benchmark_industry_weights_dict, coverage_rate_dict, factor_direction_dict