Пример #1
0
    def fallback_covariance(time_series):
        from sklearn.ensemble import IsolationForest
        from sklearn import covariance

        # Remove gross outliers
        model = IsolationForest(contamination=0.02)
        model.fit(time_series)
        outlier_mask = model.predict(time_series)
        outlier_mask[outlier_mask == -1] = 0
        time_series = time_series[outlier_mask.astype('bool')]

        # Fall back to LedoitWolf
        print('Matrix estimation failed with Lasso and shrinkage due to '
              'ill conditions. Removing potential anomalies from the '
              'time-series using IsolationForest...')
        try:
            print("Trying Ledoit-Wolf Estimator...")
            conn_measure = ConnectivityMeasure(
                cov_estimator=covariance.LedoitWolf(store_precision=True,
                                                    assume_centered=True),
                kind=kind)
            conn_matrix = conn_measure.fit_transform([time_series])[0]
        except (np.linalg.linalg.LinAlgError, FloatingPointError):
            print("Trying Oracle Approximating Shrinkage Estimator...")
            conn_measure = ConnectivityMeasure(
                cov_estimator=covariance.OAS(assume_centered=True), kind=kind)
            try:
                conn_matrix = conn_measure.fit_transform([time_series])[0]
            except (np.linalg.linalg.LinAlgError, FloatingPointError):
                raise ValueError('All covariance estimators failed to '
                                 'converge...')

        return conn_matrix
Пример #2
0
    def __init__(self, window=float('inf'), mu_estimator=None, cov_estimator=None,
                 min_history=None, max_leverage=1., method='mpt', q=0.01, gamma=0., allow_cash=False, **kwargs):
        """
        :param window: Window for calculating mean and variance. Use float('inf') for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods.
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        :param allow_cash: Allow holding cash (weights doesn't have to sum to 1)
        """
        if np.isinf(window):
            window = int(1e+8)
            min_history = min_history or 50
        else:
            min_history = min_history or window

        super(MPT, self).__init__(min_history=min_history, **kwargs)
        self.window = window
        self.max_leverage = max_leverage
        self.method = method
        self.q = q
        self.gamma = gamma
        self.allow_cash = allow_cash

        if cov_estimator is None:
            cov_estimator = 'empirical'

        if isinstance(cov_estimator, basestring):
            if cov_estimator == 'empirical':
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == 'ledoit-wolf':
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == 'graph-lasso':
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == 'oas':
                cov_estimator = covariance.OAS()
            else:
                raise NotImplemented('Unknown covariance estimator {}'.format(cov_estimator))

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator)

        if mu_estimator is None:
            mu_estimator = MuEstimator()

        if isinstance(mu_estimator, basestring):
            if mu_estimator == 'historical':
                mu_estimator = HistoricalEstimator(window)
            elif mu_estimator == 'sharpe':
                mu_estimator = MuEstimator()
            else:
                raise NotImplemented('Unknown mu estimator {}'.format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
def shrinkage():

    plt.imshow(cov_train_pyr[0])
    plt.colorbar()
    plt.show()
    cov_train_lasso = cov_train
    prec_train_lasso = cov_train
    cov_train_oas = cov_train
    corr_lasso = cov_train
    for i in range(len(data_train)):
        cov_train_oas[i] = covariance.OAS().fit(data_train[i]).covariance_
        # plt.imshow(cov_train[i])
        # plt.colorbar()
        # plt.show()
        GLassCV = covariance.GraphLassoCV(cv=5)
        cov_train_lasso[i] = GLassCV.fit(data_train[i]).covariance_
        prec_train_lasso[i] = GLassCV.fit(data_train[i]).precision_
        corr_lasso[i] = cov2corr(prec_train_lasso[i])
        print('sum of correlations: ', np.sum(np.abs(corr_lasso[i]), axis=1))
        myalphas = GLassCV.cv_alphas_
        print(myalphas)
        print(np.mean(GLassCV.grid_scores_, axis=1))

        plt.imshow(corr_lasso[i])
        plt.colorbar()
        plt.show()
    cov_train[i] = covariance.LedoitWolf().fit(data_train[i]).covariance_
Пример #4
0
 def _train(self, train_data, params, verbose):
     import sklearn.covariance as sk_cov
     if verbose:
         print("Training {} ...".format(self.name))
     start_time = time.time()
     covs = []
     for x in train_data:
         est = sk_cov.OAS()
         est.fit(x)
         covs.append(est.covariance_)
     finish_time = time.time()
     if verbose:
         print("\tElapsed time {:.1f}s".format(finish_time - start_time))
     return covs, None
Пример #5
0
 def __init__(self, dim, estimator='OAS', **kwargs):
     """
     TODO
     """
     super(SKGaussianParams, self).__init__(dim, **kwargs)
     if estimator == 'EmpiricalCovariance':
         self._estimator = covariance.EmpiricalCovariance(
             assume_centered=True)
     elif estimator == 'LedoitWolf':
         self._estimator = covariance.LedoitWolf(assume_centered=True)
     elif estimator == 'MinCovDet':
         self._estimator = covariance.MinCovDet(assume_centered=True)
     elif estimator == 'OAS':
         self._estimator = covariance.OAS(assume_centered=True)
     elif estimator == 'ShrunkCovariance':
         self._estimator = covariance.ShrunkCovariance(assume_centered=True)
     else:
         raise ValueError('Unknown estimator: {}'.format(estimator))
Пример #6
0
def computeCovar(bed, shrinkMethod, fitIndividuals):
    eigen = dict([])

    if (shrinkMethod in ['lw', 'oas', 'l1', 'cv']):
        import sklearn.covariance as cov
        t0 = time.time()
        print 'Estimating shrunk covariance using', shrinkMethod, 'estimator...'

        if (shrinkMethod == 'lw'):
            covEstimator = cov.LedoitWolf(assume_centered=True,
                                          block_size=5 * bed.val.shape[0])
        elif (shrinkMethod == 'oas'):
            covEstimator = cov.OAS(assume_centered=True)
        elif (shrinkMethod == 'l1'):
            covEstimator = cov.GraphLassoCV(assume_centered=True, verbose=True)
        elif (shrinkMethod == 'cv'):
            shrunkEstimator = cov.ShrunkCovariance(assume_centered=True)
            param_grid = {'shrinkage': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]}
            covEstimator = sklearn.grid_search.GridSearchCV(
                shrunkEstimator, param_grid)
        else:
            raise Exception('unknown covariance regularizer')

        covEstimator.fit(bed.val[fitIndividuals, :].T)
        if (shrinkMethod == 'l1'):
            alpha = covEstimator.alpha_
            print 'l1 alpha chosen:', alpha
            covEstimator2 = cov.GraphLasso(alpha=alpha,
                                           assume_centered=True,
                                           verbose=True)
        else:
            if (shrinkMethod == 'cv'):
                shrinkEstimator = clf.best_params_['shrinkage']
            else:
                shrinkEstimator = covEstimator.shrinkage_
            print 'shrinkage estimator:', shrinkEstimator
            covEstimator2 = cov.ShrunkCovariance(shrinkage=shrinkEstimator,
                                                 assume_centered=True)
        covEstimator2.fit(bed.val.T)
        XXT = covEstimator2.covariance_ * bed.val.shape[1]
        print 'Done in %0.2f' % (time.time() - t0), 'seconds'

    else:
        print 'Computing kinship matrix...'
        t0 = time.time()
        XXT = symmetrize(blas.dsyrk(1.0, bed.val, lower=1))
        print 'Done in %0.2f' % (time.time() - t0), 'seconds'
        try:
            shrinkParam = float(shrinkMethod)
        except:
            shrinkParam = -1
        if (shrinkMethod == 'mylw'):
            XXT_fit = XXT[np.ix_(fitIndividuals, fitIndividuals)]
            sE2R = (np.sum(XXT_fit**2) -
                    np.sum(np.diag(XXT_fit)**2)) / (bed.val.shape[1]**2)
            #temp = (bed.val**2).dot((bed.val.T)**2)
            temp = symmetrize(
                blas.dsyrk(1.0, bed.val[fitIndividuals, :]**2, lower=1))
            sER2 = (temp.sum() - np.diag(temp).sum()) / bed.val.shape[1]
            shrinkParam = (sER2 - sE2R) / (sE2R * (bed.val.shape[1] - 1))
        if (shrinkParam > 0):
            print 'shrinkage estimator:', 1 - shrinkParam
            XXT = (1 - shrinkParam) * XXT + bed.val.shape[
                1] * shrinkParam * np.eye(XXT.shape[0])

    return XXT
Пример #7
0
    def __init__(
        self,
        window=None,
        mu_estimator=None,
        cov_estimator=None,
        mu_window=None,
        cov_window=None,
        min_history=None,
        bounds=None,
        max_leverage=1.0,
        method="mpt",
        q=0.01,
        gamma=0.0,
        optimizer_options=None,
        force_weights=None,
        **kwargs,
    ):
        """
        :param window: Window for calculating mean and variance. Use None for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods. Default is 1 year
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
            from https://en.wikipedia.org/wiki/Modern_portfolio_theory#Efficient_frontier_with_no_risk-free_asset
            q=2 is equivalent to full-kelly, q=1 is equivalent to half kelly
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        """
        super().__init__(min_history=min_history, **kwargs)
        mu_window = mu_window or window
        cov_window = cov_window or window
        self.method = method
        self.q = q
        self.gamma = gamma
        self.bounds = bounds or {}
        self.force_weights = force_weights
        self.max_leverage = max_leverage
        self.optimizer_options = optimizer_options or {}

        if bounds and max_leverage != 1:
            raise NotImplemented(
                "max_leverage cannot be used with bounds, consider removing max_leverage and replace it with bounds1"
            )

        if cov_estimator is None:
            cov_estimator = "empirical"

        if isinstance(cov_estimator, string_types):
            if cov_estimator == "empirical":
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == "ledoit-wolf":
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == "graph-lasso":
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == "oas":
                cov_estimator = covariance.OAS()
            elif cov_estimator == "single-index":
                cov_estimator = SingleIndexCovariance()
            else:
                raise NotImplemented(
                    "Unknown covariance estimator {}".format(cov_estimator)
                )

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window)

        if mu_estimator is None:
            mu_estimator = SharpeEstimator()

        if isinstance(mu_estimator, string_types):
            if mu_estimator == "historical":
                mu_estimator = HistoricalEstimator(window=mu_window)
            elif mu_estimator == "sharpe":
                mu_estimator = SharpeEstimator()
            else:
                raise NotImplemented("Unknown mu estimator {}".format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
Пример #8
0
def covar_matrix(X, method="hist", d=0.94, **kwargs):
    r"""
    Calculate the covariance matrix using the selected method.
    
    Parameters
    ----------
    X : DataFrame of shape (n_samples, n_features)
        Features matrix, where n_samples is the number of samples and 
        n_features is the number of features.    
    method : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'}
        The default is 'hist'. The method used to estimate the covariance matrix:
        
        - 'hist': use historical estimates.
        - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details.
        - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details.
        - 'ledoit': use the Ledoit and Wolf Shrinkage method.
        - 'oas': use the Oracle Approximation Shrinkage method.
        - 'shrunk': use the basic Shrunk Covariance method.
    d : scalar
        The smoothing factor of ewma methods.
        The default is 0.94.            
    **kwargs:
        Other variables related to covariance estimation. See
        `Scikit Learn <https://scikit-learn.org/stable/modules/covariance.html>`_
        for more details.
    
    Returns
    -------
    cov : nd-array
        The estimation of covariance matrix.
        
    Raises
    ------
    ValueError
        When the value cannot be calculated.
        
    """

    if not isinstance(X, pd.DataFrame):
        raise ValueError("X must be a DataFrame")

    assets = X.columns.tolist()

    if method == "hist":
        cov = np.cov(X.T)
    elif method == "ewma1":
        cov = X.ewm(alpha=1 - d).cov()
        item = cov.iloc[-1, :].name[0]
        cov = cov.loc[(item, slice(None)), :]
    elif method == "ewma2":
        cov = X.ewm(alpha=1 - d, adjust=False).cov()
        item = cov.iloc[-1, :].name[0]
        cov = cov.loc[(item, slice(None)), :]
    elif method == "ledoit":
        lw = skcov.LedoitWolf(**kwargs)
        lw.fit(X)
        cov = lw.covariance_
    elif method == "oas":
        oas = skcov.OAS(**kwargs)
        oas.fit(X)
        cov = oas.covariance_
    elif method == "shrunk":
        sc = skcov.ShrunkCovariance(**kwargs)
        sc.fit(X)
        cov = sc.covariance_

    cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets)

    return cov
Пример #9
0
    def __init__(self,
                 mu_estimator=None,
                 cov_estimator=None,
                 cov_window=None,
                 min_history=None,
                 bounds=None,
                 max_leverage=1.,
                 method='mpt',
                 q=0.01,
                 gamma=0.,
                 optimizer_options=None,
                 force_weights=None,
                 **kwargs):
        """
        :param window: Window for calculating mean and variance. Use None for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods. Default is 1 year
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        """
        super().__init__(min_history=min_history, **kwargs)
        self.method = method
        self.q = q
        self.gamma = gamma
        self.bounds = bounds
        self.force_weights = force_weights
        self.max_leverage = max_leverage
        self.optimizer_options = optimizer_options or {}

        if cov_estimator is None:
            cov_estimator = 'empirical'

        if isinstance(cov_estimator, string_types):
            if cov_estimator == 'empirical':
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == 'ledoit-wolf':
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == 'graph-lasso':
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == 'oas':
                cov_estimator = covariance.OAS()
            elif cov_estimator == 'single-index':
                cov_estimator = SingleIndexCovariance()
            else:
                raise NotImplemented(
                    'Unknown covariance estimator {}'.format(cov_estimator))

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator,
                                                window=cov_window)

        if mu_estimator is None:
            mu_estimator = SharpeEstimator()

        if isinstance(mu_estimator, string_types):
            if mu_estimator == 'historical':
                mu_estimator = HistoricalEstimator(window=cov_window)
            elif mu_estimator == 'sharpe':
                mu_estimator = SharpeEstimator()
            else:
                raise NotImplemented(
                    'Unknown mu estimator {}'.format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator