Пример #1
0
def _gmm_from_memberships(data, memberships, covariance_type):
    clusters = set(memberships)
    n_clusters = len(clusters)
    gmm = GMM(n_components=n_clusters, params='m')
    gmm.weights_ = np.ones([n_clusters])/n_clusters
    gmm.means_ = np.zeros([n_clusters, data.shape[1]]) 
    if covariance_type == 'diag':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1]])
    if covariance_type == 'spherical':
        gmm.covars_ = np.zeros([n_clusters])
    if covariance_type == 'full':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]])

    for cluster in clusters:
        cluster = int(cluster)
        indices = (memberships == cluster)
        gmm.means_[cluster, :] = data[indices, :].mean(axis=0)
        if covariance_type in ['diag', 'spherical']:
            #TODO Fix covariance calculation, for now, return cov=1
            #D = np.diag(np.cov(data[indices, :].T))
            D = np.ones([data.shape[1]])
            if covariance_type == 'spherical':
                gmm.covars_[cluster] = D.mean()
            else:
                gmm.covars_[cluster] = D
        if covariance_type == 'full':
            cov_estimator = OAS()
            cov_estimator.fit(data[indices, :])
            gmm.covars_[cluster] = cov_estimator.covariance_
    return gmm
Пример #2
0
    def fit_base(self, X, y):
        """
        Fit the SLDA model to the base data.
        :param X: an Nxd torch tensor of base initialization data
        :param y: an Nx1-dimensional torch tensor of the associated labels for X
        :return: None
        """
        print('\nFitting Base...')
        X = X.to(self.device)
        y = y.squeeze()

        # update positive and negative means
        cls_ix = torch.arange(self.num_classes)
        for k in torch.unique(y):
            self.posW[k] = X[y == k].mean(0)
            self.posT[k] = X[y == k].shape[0]
        for k in cls_ix:
            self.negW[k] = X[y != k].mean(0)
            self.negT[k] = X[y != k].shape[0]
        self.num_updates = X.shape[0]

        print('\nEstimating initial covariance matrix...')
        from sklearn.covariance import OAS
        cov_estimator = OAS(assume_centered=True)
        cov_estimator.fit((X - self.posW[y]).cpu().numpy())
        self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to(
            self.device)

        print('\nBuilding initial OOD threshold(s)...')
        self.ood_predict(X, y)

        print('')
Пример #3
0
def _gmm_from_memberships(data, memberships, covariance_type):
    clusters = set(memberships)
    n_clusters = len(clusters)
    gmm = GMM(n_components=n_clusters, params='m')
    gmm.weights_ = np.ones([n_clusters]) / n_clusters
    gmm.means_ = np.zeros([n_clusters, data.shape[1]])
    if covariance_type == 'diag':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1]])
    if covariance_type == 'spherical':
        gmm.covars_ = np.zeros([n_clusters])
    if covariance_type == 'full':
        gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]])

    for cluster in clusters:
        cluster = int(cluster)
        indices = (memberships == cluster)
        gmm.means_[cluster, :] = data[indices, :].mean(axis=0)
        if covariance_type in ['diag', 'spherical']:
            #TODO Fix covariance calculation, for now, return cov=1
            #D = np.diag(np.cov(data[indices, :].T))
            D = np.ones([data.shape[1]])
            if covariance_type == 'spherical':
                gmm.covars_[cluster] = D.mean()
            else:
                gmm.covars_[cluster] = D
        if covariance_type == 'full':
            cov_estimator = OAS()
            cov_estimator.fit(data[indices, :])
            gmm.covars_[cluster] = cov_estimator.covariance_
    return gmm
Пример #4
0
    def _shrink_covariance(asset_returns):
        """
        Regularise/Shrink the asset covariances.

        :param asset_returns: (pd.Dataframe) Asset returns
        :return: (pd.Dataframe) Shrinked asset returns covariances
        """

        oas = OAS()
        oas.fit(asset_returns)
        shrinked_covariance = oas.covariance_
        return shrinked_covariance
Пример #5
0
    def _shrink_covariance(covariance):
        """
        Regularise/Shrink the asset covariances.

        :param covariance: (pd.Dataframe) asset returns covariances
        :return: (pd.Dataframe) shrinked asset returns covariances
        """

        oas = OAS()
        oas.fit(covariance)
        shrinked_covariance = oas.covariance_
        return pd.DataFrame(shrinked_covariance, index=covariance.columns, columns=covariance.columns)
Пример #6
0
def lw_covars(returns):
    """
    Calculates a constrained covariance matrix between the returns.
    :return: A pandas dataframe of the covariance between the returns
    """
    co_vars = returns.cov() * WEEKDAYS_PER_YEAR

    if logger.isEnabledFor(logging.DEBUG):
        logger.debug("Calcing covars as table: {}".format(
            returns.to_dict('list')))

    # Shrink the covars (Ledoit and Wolff)
    sk = OAS(assume_centered=True)
    sk.fit(returns.values)
    return (1 - sk.shrinkage_) * co_vars + sk.shrinkage_ * np.trace(
        co_vars) / len(co_vars) * np.identity(len(co_vars))
Пример #7
0
def OAS_est(X):
    '''
    OAS coefficient estimate
    X_size = (n_samples, n_features)
    '''
    oa = OAS()
    cov_oa = oa.fit(X).covariance_

    return cov_oa
Пример #8
0
    def fit_base(self, X, y):
        """
        Fit the SLDA model to the base data.
        :param X: an Nxd torch tensor of base initialization data
        :param y: an Nx1-dimensional torch tensor of the associated labels for X
        :return: None
        """
        print('\nFitting Base...')

        # update class means
        for k in torch.unique(y):
            self.muK[k] = X[y == k].mean(0)
            self.cK[k] = X[y == k].shape[0]
        self.num_updates = X.shape[0]

        print('\nEstimating initial covariance matrix...')
        from sklearn.covariance import OAS
        cov_estimator = OAS(assume_centered=True)
        cov_estimator.fit((X - self.muK[y]).cpu().numpy())
        self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to(
            self.device)
Пример #9
0
def correlations(df, categorical_portions):
    # The more samples, the slower, but the more accurate the categorical correlation
    NUM_CATEGORICAL_SAMPLES = 5
    for i in range(NUM_CATEGORICAL_SAMPLES):
        df = df.append(df, ignore_index=True)
    categorical_cols = list(categorical_portions.keys())
    # First generate continuous samples for categorical values. We do this by sampling from
    # a truncated normal distribution in the range for that continous variable.
    for categorical_col in categorical_cols:
        portions = categorical_portions[categorical_col]
        # The values of the categorical variable in order
        portions_keys = [val for val, frac in portions]
        for i, cat_val in enumerate(df[categorical_col]):
            if len(portions) == 1:
                # Normal sample
                df.loc[i, categorical_col] = norm.rvs()
                continue
            ind = portions_keys.index(cat_val)
            # Get sums of prev portions including and not including this portion
            sum_a = sum(map(lambda i: portions[i][1], range(ind)))
            sum_b = sum_a + portions[ind][1]
            # Get thresholds
            threshold_a = norm.ppf(sum_a, loc=0.0, scale=1.0)
            threshold_b = norm.ppf(sum_b, loc=0.0, scale=1.0)
            # Sample truncated norm
            df.loc[i, categorical_col] = truncnorm.rvs(threshold_a,
                                                       threshold_b)

    # estimate covariance matrix
    estimator = OAS()
    estimator.fit(df.values)

    cov = pd.DataFrame(estimator.covariance_,
                       index=df.columns,
                       columns=df.columns)
    return cov
Пример #10
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.shrinkage_, 0.018740, 4)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d, assume_centered=True)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, 0.020236, 4)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    assert(oa.precision_ is None)
Пример #11
0
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False)
        lw.fit(X, assume_centered=True)
        lw_mse[i,j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i,j] = lw.shrinkage_

        oa = OAS(store_precision=False)
        oa.fit(X, assume_centered=True)
        oa_mse[i,j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i,j] = oa.shrinkage_

# plot MSE
pl.subplot(2,1,1)
pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
            label='Ledoit-Wolf', color='g')
pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
            label='OAS', color='r')
pl.ylabel("Squared error")
pl.legend(loc="upper right")
pl.title("Comparison of covariance estimators")
pl.xlim(5, 31)

# plot shrinkage coefficient
Пример #12
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)
Пример #13
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    oa = OAS()
    with warnings.catch_warnings(record=True):
        oa.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)
Пример #14
0
def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0:1]
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert(oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert(oa.precision_ is None)
Пример #15
0
def BIC(X, Y, Z, p_lambda, mode=0, shrinkage=0):
    '''
    Bayesian information criterion
    delta_BIC = BIC_M1 - BIC_M0
    M1: feature vectors X and Y modeled by two multivariate gaussians
    M0:                 Z       modeled by one              gaussian
    delta_BIC > 0: accept M1
    delta_BIC < 0: accept M0

    covariance matrix dimension size < observation size, rank(cov) <= observation size - 1
    rank deficient: http://stats.stackexchange.com/questions/60622/why-is-a-sample-covariance-matrix-singular-when-sample-size-is-less-than-number

    mode
    0: BIC
    1: BICc
    2: ABF2

    shrinkage
    0: no shrinkage
    1: Ledoit-Wolf
    2: OAS

    :param X: frame * feature
    :param Y: frame * feature
    :param Z: frame * feature
    :param p_lambda:
    :return:
    '''

    p = X.shape[1]
    N_x = X.shape[0]
    N_y = Y.shape[0]
    N_z = Z.shape[0]

    # centering data
    mean_X = np.mean(X, axis=0)
    mean_Y = np.mean(Y, axis=0)
    mean_Z = np.mean(Z, axis=0)

    X = X - mean_X
    Y = Y - mean_Y
    Z = Z - mean_Z

    if shrinkage == 1:
        lw = LedoitWolf(store_precision=False, assume_centered=False)
        lw.fit(X)
        sigma_x = lw.covariance_
        lw.fit(Y)
        sigma_y = lw.covariance_
        lw.fit(Z)
        sigma_z = lw.covariance_
    elif shrinkage == 2:
        oa = OAS(store_precision=False, assume_centered=False)
        oa.fit(X)
        sigma_x = oa.covariance_
        oa.fit(Y)
        sigma_y = oa.covariance_
        oa.fit(Z)
        sigma_z = oa.covariance_
    else:
        sigma_x = np.cov(X, rowvar=0)
        sigma_y = np.cov(Y, rowvar=0)
        sigma_z = np.cov(Z, rowvar=0)

    sign_z, logdet_z = np.linalg.slogdet(sigma_z)
    sign_y, logdet_y = np.linalg.slogdet(sigma_y)
    sign_x, logdet_x = np.linalg.slogdet(sigma_x)

    # det_z   = sign_z*np.exp(logdet_z)
    # det_y   = sign_y*np.exp(logdet_y)
    # det_x   = sign_x*np.exp(logdet_x)

    R = (N_z/2.0) * logdet_z - \
        (N_y/2.0) * logdet_y - \
        (N_x/2.0) * logdet_x

    k_z = (p + p * (p + 1) / 2.0)

    if mode == 0:
        P = k_z * np.log(N_z) / 2.0
    elif mode == 1:
        P = k_z * np.log(N_z) * (2.0 / (N_z - 2 * k_z - 1) -
                                 (1.0 / (N_z - k_z - 1))) / 2.0
        P *= 10000
    elif mode == 2:
        P = P_ABF2(mean_X, mean_Y, mean_Z, sigma_x, sigma_y, sigma_z, N_x, N_y,
                   N_z)

    # print R, P, R-p_lambda*P, logdet_z, logdet_y, logdet_x, N_z, N_y, N_x
    # if det_z <0: print det_z
    # if det_y <0: print det_y
    # if det_x <0: print det_x

    return R - p_lambda * P
Пример #16
0
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(np.random.normal(size=(n_samples, n_features)),
                   coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(
    n_samples_range,
    lw_mse.mean(1),
    yerr=lw_mse.std(1),
    label="Ledoit-Wolf",
    color="navy",
    lw=2,
)
plt.errorbar(
    n_samples_range,
Пример #17
0
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(
            np.random.normal(size=(n_samples, n_features)), coloring_matrix.T)

        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False, assume_centered=True)
        oa.fit(X)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
plt.subplot(2, 1, 1)
plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1),
             label='Ledoit-Wolf', color='g')
plt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1),
             label='OAS', color='r')
plt.ylabel("Squared error")
plt.legend(loc="upper right")
plt.title("Comparison of covariance estimators")
plt.xlim(5, 31)

# plot shrinkage coefficient
Пример #18
0
print timecourse_files

# roll through the subjects
print np.shape(timecourse_data)[0]
for i in range(np.shape(timecourse_data)[0]) :
#for i in range(10) :

    print i
    
    # extract the timecourses for this subejct
    subject_timecourses = timecourse_data[i, : ,:]
    #print np.shape(subject_timecourses)
    
    # calculate Pearson covariance
    X = scale(subject_timecourses, axis=1)
    cov = np.dot(X, np.transpose(X)) / np.shape(X)[1]
    print cov[:5, :5]
    print logm(cov)[:5, :5]
    
    # calculate sparse inverse covariance (precision) matrix
    model = OAS(store_precision=False, assume_centered=True)
    model.fit(np.transpose(X))
    cov = model.covariance_
    OAS_matrices[i, :] = np.reshape(cov, (1, 8100))
    #print cov[:5, :5]
    foo = logm(cov)
    #print logm(cov[:5, :5])
    
    
## save the data
np.savetxt('/home/jonyoung/IoP_data/Data/connectivity_data/OAS_data.csv', OAS_matrices, delimiter=',')
Пример #19
0
X_test = np.dot(base_X_test, coloring_matrix)

###############################################################################
# Compute Ledoit-Wolf and Covariances on a grid of shrinkages

from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \
    log_likelihood, empirical_covariance

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train, assume_centered=True).score(
    X_test, assume_centered=True)

# spanning a range of possible shrinkage coefficient values
shrinkages = np.logspace(-3, 0, 30)
negative_logliks = [-ShrunkCovariance(shrinkage=s).fit(
        X_train, assume_centered=True).score(X_test, assume_centered=True) \
                         for s in shrinkages]

# getting the likelihood under the real model
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

###############################################################################
# Plot results
pl.figure(-1)
Пример #20
0
repeat = 100
lw_mse = np.zeros((n_samples_range.size, repeat))
oas_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oas_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(np.random.normal(size=(n_samples, n_features)),
                   coloring_matrix)
        lw = LedoitWolf(store_precision=False, assume_centered=True)
        lw.fit(X)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oas = OAS(store_precision=False, assume_centered=True)
        oas.fit(X)
        oas_mse[i, j] = oas.error_norm(real_cov, scaling=False)
        oas_shrinkage[i, j] = oas.shrinkage_

# plot MSE
plt.subplot(211)
plt.errorbar(n_samples_range,
             lw_mse.mean(1),
             yerr=lw_mse.std(1),
             label='Ledoit-Wolf',
             color='navy',
             lw=2)
plt.errorbar(n_samples_range,
             oas_mse.mean(1),
             yerr=oas_mse.std(1),
             label='OAS',
Пример #21
0
def test_oas():
    """Tests OAS module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.shrinkage_, 0.018740, 4)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d, assume_centered=True)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X, assume_centered=True)
    assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4)
    assert(oa.precision_ is None)

    ### Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, 0.020236, 4)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), 2.079025, 4)
    assert(oa.precision_ is None)
Пример #22
0
def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr


if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()  #.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
    fig = plt.figure()
    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
        #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
        ax = fig.add_subplot(2, 2, i + 1)
        plot_corr(c, xnames=None, title=titles[i], normcolor=normcolor, ax=ax)
Пример #23
0
def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # FIXME I don't know what this test does
    X_1sample = np.arange(5)
    oa = OAS()
    assert_warns(UserWarning, oa.fit, X_1sample)
    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)
# #############################################################################
# Compare different approaches to setting the parameter

# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)

# OAS coefficient estimate
oa = OAS()
loglik_oa = oa.fit(X_train).score(X_test)

# #############################################################################
# Plot results
fig = plt.figure()
plt.title("Regularized covariance: likelihood and shrinkage coefficient")
plt.xlabel('Regularization parameter: shrinkage coefficient')
plt.ylabel('Error: negative log-likelihood on test data')
# range shrinkage curve
plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood")

plt.plot(plt.xlim(), 2 * [loglik_real], '--r',
         label="Real covariance likelihood")

# adjust view
lik_max = np.amax(negative_logliks)
Пример #25
0
    if args.verbose:
        print('sys.argv:')
        print(sys.argv)
        print()
        print('numpy version:', np.__version__)
        print('pandas version:', pd.__version__)
        print('scipy version:', sp.__version__)
        print()

    gene_expr_raw = pd.read_table(args.data)
    gene_expr = gene_expr_raw.T

    X_centered = (gene_expr - gene_expr.mean()) / np.sqrt(gene_expr.var())
    oa = OAS(store_precision=True, assume_centered=True)
    gene_expr_OAS_corr = oa.fit(X_centered)

    n_genes = gene_expr_OAS_corr.covariance_.shape[1]
    g = Graph(directed=False)
    g.add_vertex(n=n_genes)

    spearman = g.new_ep("double", 0)
    pval = g.new_ep("double", 0)
    genes = g.new_vertex_property(
        "string", np.array(np.array(gene_expr.columns, dtype="str")))
    g.vertex_properties["genes"] = genes

    for i in range(n_genes):
        for j in range(i):
            spearman_r = sp.stats.spearmanr(X_centered.iloc[:, i],
                                            X_centered.iloc[:, j])
Пример #26
0
lw_mse = np.zeros((n_samples_range.size, repeat))
oa_mse = np.zeros((n_samples_range.size, repeat))
lw_shrinkage = np.zeros((n_samples_range.size, repeat))
oa_shrinkage = np.zeros((n_samples_range.size, repeat))
for i, n_samples in enumerate(n_samples_range):
    for j in range(repeat):
        X = np.dot(np.random.normal(size=(n_samples, n_features)),
                   coloring_matrix.T)

        lw = LedoitWolf(store_precision=False)
        lw.fit(X, assume_centered=True)
        lw_mse[i, j] = lw.error_norm(real_cov, scaling=False)
        lw_shrinkage[i, j] = lw.shrinkage_

        oa = OAS(store_precision=False)
        oa.fit(X, assume_centered=True)
        oa_mse[i, j] = oa.error_norm(real_cov, scaling=False)
        oa_shrinkage[i, j] = oa.shrinkage_

# plot MSE
pl.subplot(2, 1, 1)
pl.errorbar(n_samples_range,
            lw_mse.mean(1),
            yerr=lw_mse.std(1),
            label='Ledoit-Wolf',
            color='g')
pl.errorbar(n_samples_range,
            oa_mse.mean(1),
            yerr=oa_mse.std(1),
            label='OAS',
            color='r')
Пример #27
0
def cov2corr(cov):
    std_ = np.sqrt(np.diag(cov))
    corr = cov / np.outer(std_, std_)
    return corr

if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()#.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
    fig = plt.figure()
    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
    #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
        ax = fig.add_subplot(2,2,i+1)
        plot_corr(c, xnames=None, title=titles[i],
Пример #28
0
def test_oas():
    # Tests OAS module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    oa = OAS(assume_centered=True)
    oa.fit(X_centered)
    shrinkage_ = oa.shrinkage_
    score_ = oa.score(X_centered)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered,
                                                 assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0:1]
    oa = OAS(assume_centered=True)
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False, assume_centered=True)
    oa.fit(X_centered)
    assert_almost_equal(oa.score(X_centered), score_, 4)
    assert (oa.precision_ is None)

    # Same tests without assuming centered data--------------------------------
    # test shrinkage coeff on a simple data set
    oa = OAS()
    oa.fit(X)
    assert_almost_equal(oa.shrinkage_, shrinkage_, 4)
    assert_almost_equal(oa.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    # compare estimates given by OAS and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=oa.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, oa.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    oa = OAS()
    oa.fit(X_1d)
    oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d)
    assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4)
    assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    oa = OAS()
    warn_msg = (
        "Only one sample available. You may want to reshape your data array")
    with pytest.warns(UserWarning, match=warn_msg):
        oa.fit(X_1sample)

    assert_array_almost_equal(oa.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    oa = OAS(store_precision=False)
    oa.fit(X)
    assert_almost_equal(oa.score(X), score_, 4)
    assert (oa.precision_ is None)