示例#1
0
    def __init__(self, **kwargs):
        self._mu = kwargs.get('mu', None)
        self._sd = kwargs.get('sd', None)
        self._ir = kwargs.get('ir', None)
        self._mu_min = np.nan
        self._mu_max = np.nan
        self._desc = kwargs.get('desc', 'Portfolio')
        self._tol = kwargs.get('tol', 1e-10)
        self._asset_return = kwargs.get('asset_return', None)
        self._asset_mu = kwargs.get('asset_mu', None)
        self._asset_cov = kwargs.get('asset_cov', None)
        self._asset_weight = kwargs.get('asset_weight', None)
        self._asset_name = kwargs.get('asset_name', None)

        pyFinWarning(
            not (self._asset_return is not None
                 and self._asset_mu is not None), UserWarning,
            "When both asset return and asset mu is given, only asset mu {0} will be used"
            .format(self._asset_mu))

        if self._asset_name is None:
            if isinstance(self._asset_mu, pd.DataFrame) or isinstance(
                    self._asset_mu, pd.Series):
                self._asset_name = self._asset_mu.index
            elif isinstance(self._asset_cov, pd.DataFrame):
                self._asset_name = self._asset_cov.index
            elif isinstance(self._asset_weight, pd.DataFrame) or isinstance(
                    self._asset_mu, pd.Series):
                self._asset_name = self._asset_weight.index
            else:
                self._asset_name = pd.Index(['Asset_' + str(i) for i in range(len(self._asset_cov))]) \
                    if self._asset_cov is not None else \
                    pd.Index(['Asset_' + str(i) for i in range(len(self._asset_return))])

        self._nb_asset = len(self._asset_name)

        if self._asset_mu is None:
            if self._asset_return is None:
                self._asset_mu = pd.Series(
                    {'mu': np.repeat(np.nan, self._nb_asset, 0)},
                    index=self._asset_name)
            else:
                mu = np.mean(self._asset_return, axis=0)
                self._asset_mu = pd.Series({'mu': mu}, index=self._asset_name)
        else:
            self._mu_min = np.ceil(
                self._asset_mu.min()[0] / self._tol) * self._tol
            self._mu_max = np.floor(
                self._asset_mu.max()[0] / self._tol) * self._tol

        if self._asset_cov is None:
            if self._asset_return is None:
                tmp = np.empty((self._nb_asset, self._nb_asset))
                tmp[:] = np.nan
            else:
                tmp = np.cov(self._asset_return)
            self._asset_cov = pd.DataFrame(tmp,
                                           index=self._asset_name,
                                           columns=self._asset_name)
示例#2
0
def neutralize(factors,
               industries,
               caps=None,
               na_handler=FactorNAHandler.ReplaceWithMedian):
    """
    :param factors: pd.Series, 原始截面因子
    :param industries: pd.Series, value = 行业名称
    :param caps: optional, pd.Series, value = caps value
    :param na_handler: enum, handler for na values
    :return: 中性化后的因子
    """
    # 通过concat把数据对齐
    pyFinWarning(factors.size == industries.size, Warning,
                 "size of factors does not equal to that of industries")
    if caps is None:
        data = pd.concat([factors, industries],
                         join_axes=[factors.index],
                         axis=1)
        lcap = None
    else:
        pyFinWarning(factors.size == caps.size, Warning,
                     "size of factors does not equal to that of caps")
        data = pd.concat([factors, industries, caps],
                         join_axes=[factors.index],
                         axis=1)
        lcap = np.log(data[data.columns[2]])

    factors = data[data.columns[0]]
    industries = data[data.columns[1]]

    # 把没有行业对应的变成'other'
    industries = industries.fillna('other')
    # 把没有市值的设置成均值
    if lcap is not None:
        lcap = factor_na_handler(lcap, na_handler)
    factors = factor_na_handler(factors, na_handler)

    linreg = LinearRegression(fit_intercept=False)
    y = factors
    x = get_industry_matrix(industries, lcap)
    model = linreg.fit(x, y)
    coef = np.mat(linreg.coef_)
    a = np.dot(x, coef.T)
    residues = y.values - a.A1
    ret = pd.Series(residues, index=factors.index, name=factors.name).dropna()
    return ret
示例#3
0
def format_raw_data(raw_data, freq, field, return_type):
    ret = pd.DataFrame()
    if len(raw_data) > 0:
        if freq == FreqType.EOD:
            raw_data['tradeDate'] = pd.to_datetime(raw_data['tradeDate'])
        if return_type == DfReturnType.DateIndexAndSecIDCol:
            pyFinWarning(
                len(field) == 1, Warning,
                'Only the columns with name=field[0] will be used as values in pivot table'
            )
            ret = raw_data.pivot(index='tradeDate',
                                 columns='secID',
                                 values=field[0])
        else:
            ret = raw_data['tradeDate', 'secID', field]
            ret = ret.set_index('tradeDate')

    return ret
示例#4
0
 def testPyFinWarning(self):
     with warnings.catch_warnings(record=True) as warning_list:
         warnings.simplefilter("always")
         pyFinWarning(1 == 2, DeprecationWarning)
         self.assertTrue(any(item.category == DeprecationWarning for item in warning_list))