Exemplo n.º 1
0
    def _concat_factor_data(self, factor):
        py_assert(self.freq == factor.freq,
                    ValueError,
                    'Failed to concatenate: factor {0} has different freq from containter'.format(factor.name))

        self._check_tiaocang_date(factor)
        self.data = pd.concat([self.data, factor.data], axis=1)
Exemplo n.º 2
0
 def remove_factor(self, factor):
     factor_name = factor if isinstance(factor, str) else factor.name
     py_assert(factor_name in self.names, ValueError,
                 'unable to remove factor_name {0}, which does not exist in current container'.format(factor_name))
     self.names.remove(factor_name)
     self.data.drop(factor_name, axis=1, inplace=True)
     self.property = dissoc(self.property, factor_name)
     return
Exemplo n.º 3
0
 def _check_tiaocang_date(self, factor):
     """
     如果该因子的freq和container的freq一致,那么该因子的日期列应该包含所有的调仓日期,否则报错
     """
     py_assert(set(self._tiaocang_date).issubset(set(factor.trade_date_list)),
                 ValueError,
                 'factor {0} does not contain all tiaocang date in its trade date list'.format(factor.name))
     return
Exemplo n.º 4
0
    def _concat_factor_data(self, factor):
        py_assert(
            self.freq == factor.freq, ValueError,
            'Failed to concatenate: factor {0} has different freq from containter'
            .format(factor.name))

        self._check_tiaocang_date(factor)
        self.data = pd.concat([self.data, factor.data], axis=1)
Exemplo n.º 5
0
 def fit(self, factor_container, **kwargs):
     self.factor_name = factor_container.alpha_factor_col if self.factor_name is None else self.factor_name
     py_assert(set(self.factor_name).issubset(set(factor_container.alpha_factor_col)), ValueError,
                 'factor_name must be one of alpha factors in factor container')
     nb_factor = len(self.factor_name)
     self.weight = [1.0 / nb_factor] * nb_factor if self.weight is None else self.weight
     self.ascend_order = [1] * nb_factor if self.ascend_order is None else self.ascend_order
     return self
Exemplo n.º 6
0
 def get_industry_weight_on_date(self, date):
     py_assert(self.industry_weight is not None, ValueError, 'industry weight data is empty')
     date = ensure_datetime(date)
     data = self.industry_weight.loc[date]
     data = data.reset_index().set_index(INDEX_INDUSTRY_WEIGHT.industry_index)
     data = data[data.columns[0]]
     resid_weight = 100 - np.sum(data.values)
     data['other'] = resid_weight if resid_weight > 0 else 0.0
     return data
Exemplo n.º 7
0
def top(df, column=None, n=5):
    if isinstance(df, pd.Series):
        ret = df.sort_values(ascending=False)[:n]
    else:
        py_assert(column is not None,
                  "Specify the col name or use pandas Series type of data")
        ret = df.sort_values(by=column, ascending=False)[:n]

    return ret
Exemplo n.º 8
0
 def remove_factor(self, factor):
     factor_name = factor if isinstance(factor, str) else factor.name
     py_assert(
         factor_name in self.names, ValueError,
         'unable to remove factor_name {0}, which does not exist in current container'
         .format(factor_name))
     self.names.remove(factor_name)
     self.data.drop(factor_name, axis=1, inplace=True)
     self.property = dissoc(self.property, factor_name)
     return
Exemplo n.º 9
0
 def _check_tiaocang_date(self, factor):
     """
     如果该因子的freq和container的freq一致,那么该因子的日期列应该包含所有的调仓日期,否则报错
     """
     py_assert(
         set(self._tiaocang_date).issubset(set(factor.trade_date_list)),
         ValueError,
         'factor {0} does not contain all tiaocang date in its trade date list'
         .format(factor.name))
     return
Exemplo n.º 10
0
 def get_industry_weight_on_date(self, date):
     py_assert(self.industry_weight is not None, ValueError,
               'industry weight data is empty')
     date = ensure_datetime(date)
     data = self.industry_weight.loc[date]
     data = data.reset_index().set_index(
         INDEX_INDUSTRY_WEIGHT.industry_index)
     data = data[data.columns[0]]
     resid_weight = 100 - np.sum(data.values)
     data['other'] = resid_weight if resid_weight > 0 else 0.0
     return data
Exemplo n.º 11
0
    def _merge_factors(self, factors):
        if factors is None:
            return

        factors_ = factors if isinstance(factors, list) else list(factors)
        for factor in factors_:
            py_assert(factor.production_format == OutputDataFormat.MULTI_INDEX_DF,
                        ValueError,
                        'factor {0} does not in multi-index dataframe format therefore can not be merged into container'
                        .format(factor.name))
            self._merge_factor(factor)
        self.data = self.data.loc[self._tiaocang_date]
        return
Exemplo n.º 12
0
 def fit(self, factor_container, **kwargs):
     self.factor_name = factor_container.alpha_factor_col if self.factor_name is None else self.factor_name
     py_assert(
         set(self.factor_name).issubset(
             set(factor_container.alpha_factor_col)), ValueError,
         'factor_name must be one of alpha factors in factor container')
     nb_factor = len(self.factor_name)
     self.weight = [1.0 / nb_factor
                    ] * nb_factor if self.weight is None else self.weight
     self.ascend_order = [
         1
     ] * nb_factor if self.ascend_order is None else self.ascend_order
     return self
Exemplo n.º 13
0
    def _merge_factors(self, factors):
        if factors is None:
            return

        factors_ = factors if isinstance(factors, list) else list(factors)
        for factor in factors_:
            py_assert(
                factor.production_format == OutputDataFormat.MULTI_INDEX_DF,
                ValueError,
                'factor {0} does not in multi-index dataframe format therefore can not be merged into container'
                .format(factor.name))
            self._merge_factor(factor)
        self.data = self.data.loc[self._tiaocang_date]
        return
Exemplo n.º 14
0
    def _build_mapper(self, factor_container):
        data_mapper_by_date = pd.Series()
        industry_code = factor_container.industry_code
        score = factor_container.score
        for date in factor_container.tiaocang_date:
            if self.method == SelectionMethod.INDUSTRY_NEUTRAL:
                py_assert(self.industry_weight is not None, ValueError, 'industry weight has not been given')
                industry_weight = self.industry_weight.loc[date]
                data_mapper = [([score.name, industry_code.name],
                                IndustryNeutralSelector(industry_weight=industry_weight,
                                                        prop_select=self.prop_select,
                                                        min_select_per_industry=self.min_select_per_industry,
                                                        reset_index=True))]
            else:
                data_mapper = [(score.name, BrutalSelector(nb_select=self.nb_select,
                                                           prop_select=self.prop_select,
                                                           reset_index=True))]
            data_mapper_by_date[date] = DataFrameMapper(data_mapper, input_df=True, df_out=True)

        return data_mapper_by_date
Exemplo n.º 15
0
 def test_py_assert(self):
     with self.assertRaises(ValueError):
         py_assert(1 == 2, ValueError)