def __init__(self, **kwargs): self._mu = kwargs.get('mu', None) self._sd = kwargs.get('sd', None) self._ir = kwargs.get('ir', None) self._mu_min = np.nan self._mu_max = np.nan self._desc = kwargs.get('desc', 'Portfolio') self._tol = kwargs.get('tol', 1e-10) self._asset_return = kwargs.get('asset_return', None) self._asset_mu = kwargs.get('asset_mu', None) self._asset_cov = kwargs.get('asset_cov', None) self._asset_weight = kwargs.get('asset_weight', None) self._asset_name = kwargs.get('asset_name', None) pyFinWarning( not (self._asset_return is not None and self._asset_mu is not None), UserWarning, "When both asset return and asset mu is given, only asset mu {0} will be used" .format(self._asset_mu)) if self._asset_name is None: if isinstance(self._asset_mu, pd.DataFrame) or isinstance( self._asset_mu, pd.Series): self._asset_name = self._asset_mu.index elif isinstance(self._asset_cov, pd.DataFrame): self._asset_name = self._asset_cov.index elif isinstance(self._asset_weight, pd.DataFrame) or isinstance( self._asset_mu, pd.Series): self._asset_name = self._asset_weight.index else: self._asset_name = pd.Index(['Asset_' + str(i) for i in range(len(self._asset_cov))]) \ if self._asset_cov is not None else \ pd.Index(['Asset_' + str(i) for i in range(len(self._asset_return))]) self._nb_asset = len(self._asset_name) if self._asset_mu is None: if self._asset_return is None: self._asset_mu = pd.Series( {'mu': np.repeat(np.nan, self._nb_asset, 0)}, index=self._asset_name) else: mu = np.mean(self._asset_return, axis=0) self._asset_mu = pd.Series({'mu': mu}, index=self._asset_name) else: self._mu_min = np.ceil( self._asset_mu.min()[0] / self._tol) * self._tol self._mu_max = np.floor( self._asset_mu.max()[0] / self._tol) * self._tol if self._asset_cov is None: if self._asset_return is None: tmp = np.empty((self._nb_asset, self._nb_asset)) tmp[:] = np.nan else: tmp = np.cov(self._asset_return) self._asset_cov = pd.DataFrame(tmp, index=self._asset_name, columns=self._asset_name)
def neutralize(factors, industries, caps=None, na_handler=FactorNAHandler.ReplaceWithMedian): """ :param factors: pd.Series, 原始截面因子 :param industries: pd.Series, value = 行业名称 :param caps: optional, pd.Series, value = caps value :param na_handler: enum, handler for na values :return: 中性化后的因子 """ # 通过concat把数据对齐 pyFinWarning(factors.size == industries.size, Warning, "size of factors does not equal to that of industries") if caps is None: data = pd.concat([factors, industries], join_axes=[factors.index], axis=1) lcap = None else: pyFinWarning(factors.size == caps.size, Warning, "size of factors does not equal to that of caps") data = pd.concat([factors, industries, caps], join_axes=[factors.index], axis=1) lcap = np.log(data[data.columns[2]]) factors = data[data.columns[0]] industries = data[data.columns[1]] # 把没有行业对应的变成'other' industries = industries.fillna('other') # 把没有市值的设置成均值 if lcap is not None: lcap = factor_na_handler(lcap, na_handler) factors = factor_na_handler(factors, na_handler) linreg = LinearRegression(fit_intercept=False) y = factors x = get_industry_matrix(industries, lcap) model = linreg.fit(x, y) coef = np.mat(linreg.coef_) a = np.dot(x, coef.T) residues = y.values - a.A1 ret = pd.Series(residues, index=factors.index, name=factors.name).dropna() return ret
def format_raw_data(raw_data, freq, field, return_type): ret = pd.DataFrame() if len(raw_data) > 0: if freq == FreqType.EOD: raw_data['tradeDate'] = pd.to_datetime(raw_data['tradeDate']) if return_type == DfReturnType.DateIndexAndSecIDCol: pyFinWarning( len(field) == 1, Warning, 'Only the columns with name=field[0] will be used as values in pivot table' ) ret = raw_data.pivot(index='tradeDate', columns='secID', values=field[0]) else: ret = raw_data['tradeDate', 'secID', field] ret = ret.set_index('tradeDate') return ret
def testPyFinWarning(self): with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always") pyFinWarning(1 == 2, DeprecationWarning) self.assertTrue(any(item.category == DeprecationWarning for item in warning_list))