def _concat_factor_data(self, factor): py_assert(self.freq == factor.freq, ValueError, 'Failed to concatenate: factor {0} has different freq from containter'.format(factor.name)) self._check_tiaocang_date(factor) self.data = pd.concat([self.data, factor.data], axis=1)
def remove_factor(self, factor): factor_name = factor if isinstance(factor, str) else factor.name py_assert(factor_name in self.names, ValueError, 'unable to remove factor_name {0}, which does not exist in current container'.format(factor_name)) self.names.remove(factor_name) self.data.drop(factor_name, axis=1, inplace=True) self.property = dissoc(self.property, factor_name) return
def _check_tiaocang_date(self, factor): """ 如果该因子的freq和container的freq一致,那么该因子的日期列应该包含所有的调仓日期,否则报错 """ py_assert(set(self._tiaocang_date).issubset(set(factor.trade_date_list)), ValueError, 'factor {0} does not contain all tiaocang date in its trade date list'.format(factor.name)) return
def _concat_factor_data(self, factor): py_assert( self.freq == factor.freq, ValueError, 'Failed to concatenate: factor {0} has different freq from containter' .format(factor.name)) self._check_tiaocang_date(factor) self.data = pd.concat([self.data, factor.data], axis=1)
def fit(self, factor_container, **kwargs): self.factor_name = factor_container.alpha_factor_col if self.factor_name is None else self.factor_name py_assert(set(self.factor_name).issubset(set(factor_container.alpha_factor_col)), ValueError, 'factor_name must be one of alpha factors in factor container') nb_factor = len(self.factor_name) self.weight = [1.0 / nb_factor] * nb_factor if self.weight is None else self.weight self.ascend_order = [1] * nb_factor if self.ascend_order is None else self.ascend_order return self
def get_industry_weight_on_date(self, date): py_assert(self.industry_weight is not None, ValueError, 'industry weight data is empty') date = ensure_datetime(date) data = self.industry_weight.loc[date] data = data.reset_index().set_index(INDEX_INDUSTRY_WEIGHT.industry_index) data = data[data.columns[0]] resid_weight = 100 - np.sum(data.values) data['other'] = resid_weight if resid_weight > 0 else 0.0 return data
def top(df, column=None, n=5): if isinstance(df, pd.Series): ret = df.sort_values(ascending=False)[:n] else: py_assert(column is not None, "Specify the col name or use pandas Series type of data") ret = df.sort_values(by=column, ascending=False)[:n] return ret
def remove_factor(self, factor): factor_name = factor if isinstance(factor, str) else factor.name py_assert( factor_name in self.names, ValueError, 'unable to remove factor_name {0}, which does not exist in current container' .format(factor_name)) self.names.remove(factor_name) self.data.drop(factor_name, axis=1, inplace=True) self.property = dissoc(self.property, factor_name) return
def _check_tiaocang_date(self, factor): """ 如果该因子的freq和container的freq一致,那么该因子的日期列应该包含所有的调仓日期,否则报错 """ py_assert( set(self._tiaocang_date).issubset(set(factor.trade_date_list)), ValueError, 'factor {0} does not contain all tiaocang date in its trade date list' .format(factor.name)) return
def get_industry_weight_on_date(self, date): py_assert(self.industry_weight is not None, ValueError, 'industry weight data is empty') date = ensure_datetime(date) data = self.industry_weight.loc[date] data = data.reset_index().set_index( INDEX_INDUSTRY_WEIGHT.industry_index) data = data[data.columns[0]] resid_weight = 100 - np.sum(data.values) data['other'] = resid_weight if resid_weight > 0 else 0.0 return data
def _merge_factors(self, factors): if factors is None: return factors_ = factors if isinstance(factors, list) else list(factors) for factor in factors_: py_assert(factor.production_format == OutputDataFormat.MULTI_INDEX_DF, ValueError, 'factor {0} does not in multi-index dataframe format therefore can not be merged into container' .format(factor.name)) self._merge_factor(factor) self.data = self.data.loc[self._tiaocang_date] return
def fit(self, factor_container, **kwargs): self.factor_name = factor_container.alpha_factor_col if self.factor_name is None else self.factor_name py_assert( set(self.factor_name).issubset( set(factor_container.alpha_factor_col)), ValueError, 'factor_name must be one of alpha factors in factor container') nb_factor = len(self.factor_name) self.weight = [1.0 / nb_factor ] * nb_factor if self.weight is None else self.weight self.ascend_order = [ 1 ] * nb_factor if self.ascend_order is None else self.ascend_order return self
def _merge_factors(self, factors): if factors is None: return factors_ = factors if isinstance(factors, list) else list(factors) for factor in factors_: py_assert( factor.production_format == OutputDataFormat.MULTI_INDEX_DF, ValueError, 'factor {0} does not in multi-index dataframe format therefore can not be merged into container' .format(factor.name)) self._merge_factor(factor) self.data = self.data.loc[self._tiaocang_date] return
def _build_mapper(self, factor_container): data_mapper_by_date = pd.Series() industry_code = factor_container.industry_code score = factor_container.score for date in factor_container.tiaocang_date: if self.method == SelectionMethod.INDUSTRY_NEUTRAL: py_assert(self.industry_weight is not None, ValueError, 'industry weight has not been given') industry_weight = self.industry_weight.loc[date] data_mapper = [([score.name, industry_code.name], IndustryNeutralSelector(industry_weight=industry_weight, prop_select=self.prop_select, min_select_per_industry=self.min_select_per_industry, reset_index=True))] else: data_mapper = [(score.name, BrutalSelector(nb_select=self.nb_select, prop_select=self.prop_select, reset_index=True))] data_mapper_by_date[date] = DataFrameMapper(data_mapper, input_df=True, df_out=True) return data_mapper_by_date
def test_py_assert(self): with self.assertRaises(ValueError): py_assert(1 == 2, ValueError)