Пример #1
1
 def moments(self):
     """Calculate covariance and correlation matrices,
     trait, genotipic and ontogenetic means"""
     zs = np.array([ind["z"] for ind in self.pop])
     xs = np.array([ind["x"] for ind in self.pop])
     ys = np.array([ind["y"] for ind in self.pop])
     bs = np.array([ind["b"] for ind in self.pop])
     ymean = ys.mean(axis=0)
     zmean = zs.mean(axis=0)
     xmean = xs.mean(axis=0)
     ymean = ys.mean(axis=0)
     bmean = bs.mean(axis=0)
     phenotipic = np.cov(zs, rowvar=0, bias=1)
     genetic = np.cov(xs, rowvar=0, bias=1)
     heridability = genetic[np.diag_indices_from(genetic)] / phenotipic[np.diag_indices_from(phenotipic)]
     corr_phenotipic = np.corrcoef(zs, rowvar=0, bias=1)
     corr_genetic = np.corrcoef(xs, rowvar=0, bias=1)
     avgP = avg_ratio(corr_phenotipic, self.modules)
     avgG = avg_ratio(corr_genetic, self.modules)
     return {
         "y.mean": ymean,
         "b.mean": bmean,
         "z.mean": zmean,
         "x.mean": xmean,
         "P": phenotipic,
         "G": genetic,
         "h2": heridability,
         "avgP": avgP,
         "avgG": avgG,
         "corrP": corr_phenotipic,
         "corrG": corr_genetic,
     }
Пример #2
0
def lda(data,labels,redDim):

    # Centre data
    data -= data.mean(axis=0)
    nData = np.shape(data)[0]
    nDim = np.shape(data)[1]
    
    Sw = np.zeros((nDim,nDim))
    Sb = np.zeros((nDim,nDim))
    
    C = np.cov(np.transpose(data))
    
    # Loop over classes
    classes = np.unique(labels)
    for i in range(len(classes)):
        # Find relevant datapoints
        indices = np.squeeze(np.where(labels==classes[i]))
        d = np.squeeze(data[indices,:])
        classcov = np.cov(np.transpose(d))
        Sw += np.float(np.shape(indices)[0])/nData * classcov
        
    Sb = C - Sw
    # Now solve for W and compute mapped data
    # Compute eigenvalues, eigenvectors and sort into order
    evals,evecs = la.eig(Sw,Sb)
    indices = np.argsort(evals)
    indices = indices[::-1]
    evecs = evecs[:,indices]
    evals = evals[indices]
    w = evecs[:,:redDim]
    newData = np.dot(data,w)
    return newData,w
Пример #3
0
def bootstrapped_intercluster_mahalanobis(cluster1, cluster2, n_boots=1000,
    fix_covariances=True):
    """Bootstrap the intercluster distance.
    
    Returns:
        m - The mean distance
        CI - 95% confidence interval on the distance
        distances - an array of the distances measured on each boot
    """
    d_l = []
    
    # Determine the covariance matrices, or recalculate each time
    if fix_covariances:
        icov1 = np.linalg.inv(np.cov(cluster1, rowvar=0))
        icov2 = np.linalg.inv(np.cov(cluster2, rowvar=0))
    else:
        icov1, icov2 = None, None
    
    # Bootstrap
    for n_boot in range(n_boots):
        # Draw
        idxs1 = np.random.randint(0, len(cluster1), len(cluster1))
        idxs2 = np.random.randint(0, len(cluster2), len(cluster2))
        
        # Calculate and store
        d_l.append(intercluster_mahalanobis(
            cluster1[idxs1], cluster2[idxs2], icov1, icov2))
    
    # Statistics
    d_a = np.asarray(d_l)
    m = np.mean(d_a)
    CI = mlab.prctile(d_a, (2.5, 97.5))
    return m, CI, d_a
Пример #4
0
 def test_cov_parameters(self):
     # Ticket #91
     x = np.random.random((3, 3))
     y = x.copy()
     np.cov(x, rowvar=1)
     np.cov(y, rowvar=0)
     assert_array_equal(x, y)
Пример #5
0
def wprp_split(gals, red_split, box_size, cols=['ssfr', 'pred'], jack_nside=3,
               rpmin=0.1, rpmax=20.0, Nrp=25):  # for 2 splits
    """
    Calculates the 2PCF of gals binned by sSFR, separated by red_split.

    Note that sSFR can be substitued in _cols_ to bin by, say, concentration

    Accepts:
        gals - numpy array with objects, their positions, and attributes
        red_split - value which separates two populations
        box_size - box_size of the objects in gals
        cols - tags to specify the actual and predicted distribution. Defaults
               to ['ssfr', 'pred'], but could be modified to use, say
               ['c', 'pred_c'] (assuming they exist in gals).

    Returns:
        [r, [actual], [pred], [err], [chi2]]
            r - centers of r bins
            [actual] - clustering of red/blue galaxies
            [pred] - clustering of predicted red/blue galaxies
            [err] - errorbars for red/blue galaxies
            [chi2] - goodness of fit for red/blue galaxies
    """
    r, rbins = make_r_scale(rpmin, rpmax, Nrp)
    n_jack = jack_nside ** 2
    results = []
    results.append(r)
    r_jack = []
    b_jack = []
    for col in cols:
        red = gals[gals[col] < red_split]
        blue = gals[gals[col] > red_split]
        r = calculate_xi(red, box_size, True, jack_nside, rpmin, rpmax, Nrp)
        b = calculate_xi(blue, box_size, True, jack_nside, rpmin, rpmax, Nrp)
        results.append([r[0], b[0]])
        if jack_nside <= 1:
            r_var = r[1]
            b_var = b[1]
        else:
            r_jack.append(r[2])
            b_jack.append(b[2])
    if jack_nside > 1:
        r_cov = np.cov(r_jack[0] - r_jack[1], rowvar=0, bias=1) * (n_jack - 1)
        b_cov = np.cov(b_jack[0] - b_jack[1], rowvar=0, bias=1) * (n_jack - 1)
        r_var = np.sqrt(np.diag(r_cov))
        b_var = np.sqrt(np.diag(b_cov))
    results.append([r_var, b_var])

    if jack_nside > 1:
        r_chi2 = calculate_chi_square(results[1][0], results[2][0], r_cov)
        b_chi2 = calculate_chi_square(results[1][1], results[2][1], b_cov)
        print "Goodness of fit for the red (lo) and blue (hi): ", r_chi2, b_chi2
    else:
        d_r = results[1][0] - results[2][0]
        d_b = results[1][1] - results[2][1]
        r_chi2 = d_r**2/np.sqrt(r_var[0]**2 + r_var[1]**2)
        b_chi2 = d_b**2/np.sqrt(b_var[0]**2 + b_var[1]**2)
    results.append([r_chi2, b_chi2])

    return results
Пример #6
0
 def test_2d_wo_missing(self):
     # Test cov on 1 2D variable w/o missing values
     x = self.data.reshape(3, 4)
     assert_almost_equal(np.cov(x), cov(x))
     assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False))
     assert_almost_equal(np.cov(x, rowvar=False, bias=True),
                         cov(x, rowvar=False, bias=True))
def test_update_mean_cov_L_lmbda_converges_to_weighted_mean_and_cov():
    N_init = 10
    N = 10000
    D = 2
    X = np.random.randn(N, D)
    weights = np.random.rand(N)
    
    old_mean = np.average(X[:N_init], axis=0, weights=weights[:N_init])
    old_cov_L = np.linalg.cholesky(np.cov(X[:N_init].T, ddof=0))
    
    sum_old_weights = np.sum(weights[:N_init])
    lmbdas = weights_to_lmbdas(sum_old_weights, weights[N_init:])
    
    mean, cov_L = update_mean_cov_L_lmbda(X[N_init:], old_mean, old_cov_L, lmbdas)

    full_mean = np.average(X, axis=0, weights=weights)
    
    # the above method uses N rather than N-1 to normalise covariance (biased)
    try:
        full_cov = np.cov(X.T, ddof=0, aweights=weights)
    except TypeError:
        raise SkipTest("Numpy's cov method does not support aweights keyword.")
    
    cov = np.dot(cov_L, cov_L.T)
    
    assert_allclose(full_mean, mean)
    assert_allclose(full_cov, cov, atol=1e-2)
Пример #8
0
 def testComponentSeparation(self):
     A = generate_covsig([[10,5,2],[5,10,2],[2,2,10]], 500)
     B = generate_covsig([[10,2,2],[2,10,5],[2,5,10]], 500)
         
     X = np.dstack([A,B])
     W, V = csp(X,[1,2])        
     C1a = np.cov(X[:,:,0].dot(W).T)
     C2a = np.cov(X[:,:,1].dot(W).T)
     
     Y = np.dstack([B,A])
     W, V = csp(Y,[1,2])
     C1b = np.cov(Y[:,:,0].dot(W).T)
     C2b = np.cov(Y[:,:,1].dot(W).T)
     
     # check symmetric case
     self.assertTrue(np.allclose(C1a.diagonal(), C2a.diagonal()[::-1]))
     self.assertTrue(np.allclose(C1b.diagonal(), C2b.diagonal()[::-1]))
     
     # swapping class labels (or in this case, trials) should not change the result
     self.assertTrue(np.allclose(C1a, C1b))
     self.assertTrue(np.allclose(C2a, C2b))
     
     # variance of first component should be greatest for class 1
     self.assertTrue(C1a[0,0] > C2a[0,0])
     
     # variance of last component should be greatest for class 1
     self.assertTrue(C1a[2,2] < C2a[2,2])
     
     # variance of central component should be equal for both classes
     self.assertTrue(np.allclose(C1a[1,1], C2a[1,1]))
Пример #9
0
def test_sample_conditional_mixedkernel(session_tf):
    q_mu = np.random.randn(Data.M , Data.L)  # M x L
    q_sqrt = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.L)])  # L x M x M
    Z = Data.X[:Data.M,...]  # M x D
    N = int(10e5)
    Xs = np.ones((N, Data.D), dtype=float_type)


    values = {"Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt}
    placeholders = _create_placeholder_dict(values)
    feed_dict = _create_feed_dict(placeholders, values)

    # Path 1: mixed kernel: most efficient route
    W = np.random.randn(Data.P, Data.L)
    mixed_kernel = mk.SeparateMixedMok([RBF(Data.D) for _ in range(Data.L)], W)
    mixed_feature = mf.MixedKernelSharedMof(InducingPoints(Z.copy()))

    sample = sample_conditional(placeholders["Xnew"], mixed_feature, mixed_kernel,
                                placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True)
    value, mean, var = session_tf.run(sample, feed_dict=feed_dict)


    # Path 2: independent kernels, mixed later
    separate_kernel = mk.SeparateIndependentMok([RBF(Data.D) for _ in range(Data.L)])
    shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy()))
    sample2 = sample_conditional(placeholders["Xnew"], shared_feature, separate_kernel,
                                 placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True)
    value2, mean2, var2 = session_tf.run(sample2, feed_dict=feed_dict)
    value2 = np.matmul(value2, W.T)
    # check if mean and covariance of samples are similar
    np.testing.assert_array_almost_equal(np.mean(value, axis=0),
                                         np.mean(value2, axis=0), decimal=1)
    np.testing.assert_array_almost_equal(np.cov(value, rowvar=False),
                                         np.cov(value2, rowvar=False), decimal=1)
def get_stats(arrs,interpolatenans=False):
	arrslen = len(arrs)
	if DEBUG_PRINT: print "array nums:", arrslen
	stats = [0] * arrslen
	for i,arr in enumerate(arrs):
		if(len(arrs[i].shape) > 2):
			stats[i] = None
		else:
			maskedarr = ma.masked_array(arrs[i],fill_value=0)
			if interpolatenans:
				arr = interpolate_nans(arr)
			else:
				arr = maskedarr.filled() # check and see what happens when you interpolate
			stats[i] = {'avgs':[np.mean(arr,axis=0),np.mean(arr,axis=1)],
					'stdevs':[np.std(arr,axis=0),np.std(arr,axis=1)],
					'cov':[0,0]}

			xlen = arr.shape[0]
			ylen = arr.shape[1]
			# get specific covariance values along x axis
			covx = np.zeros(xlen)
			covar = np.cov(arr) # get the covariance values by row for dim 1
			for x in range(1,xlen):
				covx[x-1] = covar[x][x-1]
			stats[i]['cov'][0] = covx
			# get specific covariance values along y axis
			covy = np.zeros(ylen)
			covar = np.cov(arr,rowvar=0)  # get the covariance values by col for dim 2
			for y in range(1,ylen):
				covy[y-1] = covar[y][y-1]
			stats[i]['cov'][1] = covy
	return stats
Пример #11
0
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function,
                                                y_is_x):
    # check that pairwise_distances give the same result in sequential and
    # parallel, when metric has data-derived parameters.
    with config_context(working_memory=1):  # to have more than 1 chunk
        rng = np.random.RandomState(0)
        X = rng.random_sample((1000, 10))

        if y_is_x:
            Y = X
            expected_dist_default_params = squareform(pdist(X, metric=metric))
            if metric == "seuclidean":
                params = {'V': np.var(X, axis=0, ddof=1)}
            else:
                params = {'VI': np.linalg.inv(np.cov(X.T)).T}
        else:
            Y = rng.random_sample((1000, 10))
            expected_dist_default_params = cdist(X, Y, metric=metric)
            if metric == "seuclidean":
                params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)}
            else:
                params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T}

        expected_dist_explicit_params = cdist(X, Y, metric=metric, **params)
        dist = np.vstack(tuple(dist_function(X, Y,
                                             metric=metric, n_jobs=n_jobs)))

        assert_allclose(dist, expected_dist_explicit_params)
        assert_allclose(dist, expected_dist_default_params)
def get_features(data):
    X = [d[0] for d in data] 
    Y = [d[1] for d in data]
    Z = [d[2] for d in data]
    x_mean = np.mean(X)
    y_mean = np.mean(Y)
    z_mean = np.mean(Z)
    x_var  = np.var(X)
    y_var =  np.var(Y)
    z_var =  np.var(Z)
    mean_magnitude = np.mean([math.sqrt(x*x + y*y +z*z) for (x,y,z) in izip(X,Y,Z)]) 
    magnitude_mean = math.sqrt(x_mean*x_mean + y_mean*y_mean + z_mean*z_mean)
    sma = np.mean([math.fabs(x) + math.fabs(y) + math.fabs(z) for (x,y,z) in izip(X,Y,Z)])
    corr_xy = (np.cov(X,Y) / (math.sqrt(x_var) * math.sqrt(y_var)))[0][1]
    corr_yz = (np.cov(Y,Z) / (math.sqrt(z_var) * math.sqrt(y_var)))[0][1]
    corr_xz = (np.cov(Z,X) / (math.sqrt(x_var) * math.sqrt(z_var)))[0][1]
    vector_d = [(x - x_mean, y - y_mean, z - z_mean) for (x,y,z) in izip(X,Y,Z)]
    vector_v = [x_mean, y_mean, z_mean]
    vector_p = [np.multiply((np.dot(d, vector_v)/np.dot(vector_v, vector_v)), vector_v) for d in vector_d]
    
    vector_h = [np.subtract(d, p) for d, p in izip(vector_d, vector_p)]
    mod_vector_p = [np.linalg.norm(p) for p in vector_p]
    mod_vector_h = [np.linalg.norm(h) for h in vector_h]
    cor_p_h = (np.cov(mod_vector_h,mod_vector_p) / (math.sqrt(np.var(mod_vector_h)) * math.sqrt(np.var(mod_vector_p))))[0][1]
    
    vector_p = np.mean(vector_p, axis=0)
    vector_h = np.mean(vector_h, axis=0)
    mod_vector_p = np.mean(mod_vector_p)
    mod_vector_h = np.mean(mod_vector_h)
    ret = [x_mean, y_mean, z_mean, x_var, y_var, z_var, mean_magnitude, magnitude_mean, sma, corr_xy, corr_yz, corr_xz, cor_p_h, mod_vector_p, mod_vector_h]
    ret.extend([x for x in vector_p])

    ret.extend([x for x in vector_h])
    return ret
Пример #13
0
def main():
    fnm = 'prob3.data'
    data = md.read_data(fnm)
    D1 = data[0:8,].T
    D2 = data[8:,].T

    u1 = np.matrix((np.mean(D1[0,:]), np.mean(D1[1,:]))).T
    u2 = np.matrix((np.mean(D2[0,:]), np.mean(D2[1,:]))).T

    sigma1 = np.asmatrix(np.cov(D1, bias=1))
    sigma2 = np.asmatrix(np.cov(D1, bias=1))

    g1 = discrim_func(u1, sigma1)
    g2 = discrim_func(u2, sigma2)

    steps = 100
    x = np.linspace(-2,2,steps)
    y = np.linspace(-6,6,steps)

    X,Y = np.meshgrid(x,y)
    z = [g1(X[r,c], Y[r,c]) - g2(X[r,c], Y[r,c])
         for r in range(0,steps) for c in range(0,steps)]
    Z = np.array(z)
    px = X.ravel()
    py = Y.ravel()
    pz = Z.ravel()
    gridsize = 50
    plot = plt.subplot(111)
    plt.hexbin(px,py,C=pz, gridsize=gridsize, cmap=cm.jet, bins=None)
    cb = plt.colorbar()
    cb.set_label('g1 minus g2')
    return plot
Пример #14
0
def cov_estimation(list_of_recarrays, index_name, pair_wise=False):
    def get_the_other_name(rec, index_name):
        assert len(rec.dtype.names) == 2
        name = [nm for nm in rec.dtype.names if nm != index_name]
        assert len(name) == 1
        return name[0]
    for array in list_of_recarrays:
        array[get_the_other_name(array, index_name)] = winsorize(array[get_the_other_name(array, index_name)], 99)
    nn = len(list_of_recarrays)
    if not pair_wise:
        new_rec = list_of_recarrays[0]
        for ii in range(1, nn):
            new_rec = rec_join(index_name, new_rec, list_of_recarrays[ii], jointype='inner', defaults=None, r1postfix='', r2postfix=str(ii+1))
            dat_mat = np.c_[[new_rec[nm] for nm in new_rec.dtype.names if nm != index_name]]
            covmat = np.cov(dat_mat)
    else :
        covmat = np.zeros((nn, nn))
        for ii in range(0, nn):
            covmat[ii,ii] = list_of_recarrays[ii][get_the_other_name(list_of_recarrays[ii], index_name)].var()
            for jj in range(ii+1, nn):
                new_rec = rec_join(index_name, list_of_recarrays[ii], list_of_recarrays[jj], jointype='inner', defaults=None, r1postfix='1', r2postfix='2')
                dat_mat = np.c_[[new_rec[nm] for nm in new_rec.dtype.names if nm != index_name]]
                tmp_cov = np.cov(dat_mat)[0,1]
                covmat[ii,jj] = tmp_cov
                covmat[jj,ii] = tmp_cov
    return covmat
Пример #15
0
 def test_nancov(self):
     targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
     targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
     self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
     targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
     targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
     self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
Пример #16
0
    def dataNorm(self):
        SXX = np.cov(self.X)
        U, l, Ut = LA.svd(SXX, full_matrices=True) 
        H = np.dot(LA.sqrtm(LA.inv(np.diag(l))),Ut)
        self.nX = np.dot(H,self.X)

        #print np.cov(self.nX)
        #print "mean:"
        #print np.mean(self.nX)

        SYY = np.cov(self.Y)
        U, l, Ut = LA.svd(SYY, full_matrices=True) 
        H = np.dot(LA.sqrtm(LA.inv(np.diag(l))),Ut)
        #print "H"
        #print H
        self.nY = np.dot(H,self.Y)
        #print np.cov(self.nY)

        print "dataNorm_X:"
        for i in range(len(self.nX)):
            print(self.nX[i])
        print("---")

        print "dataNorm_Y:"
        for i in range(len(self.nY)):
            print(self.nY[i])
        print("---")
Пример #17
0
def poly_to_gaussian(points):
    """Given `points` of the border of a polygon in local space, return the
    center and covariance matrix of the smallest area enclosing ellipse"""
    import subprocess as sp
    np.savetxt('__poly', points, fmt='%.10f %.10f')
    params = sp.check_output('./ellipse', shell=True)
    a, b, c, d, f, g = [float(_) for _ in params.decode('utf8').split(' ')]
    if b*b-a*c < 1e-8:
        return tuple(points.mean(0)), np.cov(points.T)
    x0 = (c*d-b*f)/(b*b-a*c)
    y0 = (a*f-b*d)/(b*b-a*c)
    numerator = 2*(a*f*f+c*d*d+g*b*b-2*b*d*f-a*c*g)
    denom_sqrt = np.sqrt(4*b*b+(a-c)**2)
    _a = np.sqrt(numerator/((b*b-a*c)*(denom_sqrt - (a+c))))
    _b = np.sqrt(numerator/((b*b-a*c)*(-denom_sqrt - (a+c))))
    det = abs(np.linalg.det([[a, b, d], [b, c, f], [d, f, g]]))
    if det < 1e-6 or np.any(np.isnan([_a, _b])):
        # screw it, let's do something less accurate but more straightforward
        return tuple(points.mean(0)), np.cov(points.T)
    if b < 1e-5:
        theta = 0 if a < c else np.pi/2
    else:
        theta = .5*arccot((a-c)/(2*b))
        if a > c:
            theta += np.pi/2
    R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    S = np.diag([_a, _b])
    T = R.dot(S)
    return (x0, y0), .5*T.dot(T.T)
 def test_cov_parameters(self,level=rlevel):
     """Ticket #91"""
     x = np.random.random((3, 3))
     y = x.copy()
     np.cov(x, rowvar=1)
     np.cov(y, rowvar=0)
     assert_array_equal(x, y)
 def check_cov_parameters(self,level=rlevel):
     """Ticket #91"""
     x = N.random.random((3,3))
     y = x.copy()
     N.cov(x,rowvar=1)
     N.cov(y,rowvar=0)
     assert_array_equal(x,y)
Пример #20
0
def fit_gaussians(x_train_boxcox, y_train):
    """ Fit class-dependent multivariate gaussians on the training set.

    Parameters
    ----------
    x_train_boxcox : np.array [n_samples, n_features_trans]
        Transformed training features.
    y_train : np.array [n_samples]
        Training labels.

    Returns
    -------
    rv_pos : multivariate normal
        multivariate normal for melody class
    rv_neg : multivariate normal
        multivariate normal for non-melody class
    """
    pos_idx = np.where(y_train == 1)[0]
    mu_pos = np.mean(x_train_boxcox[pos_idx, :], axis=0)
    cov_pos = np.cov(x_train_boxcox[pos_idx, :], rowvar=0)

    neg_idx = np.where(y_train == 0)[0]
    mu_neg = np.mean(x_train_boxcox[neg_idx, :], axis=0)
    cov_neg = np.cov(x_train_boxcox[neg_idx, :], rowvar=0)
    rv_pos = multivariate_normal(mean=mu_pos, cov=cov_pos, allow_singular=True)
    rv_neg = multivariate_normal(mean=mu_neg, cov=cov_neg, allow_singular=True)
    return rv_pos, rv_neg
def correlation():
    df =  pd.read_csv("dataset/train_new.csv")
    # df = df.dropna(axis=0,how="any")
    print df.describe()
    # print df.head()
    param=[]
    correlation=[]
    abs_corr=[]
    covariance = []
    columns = ["Applicant_Gender","App_age","Applicant_Occupation","Applicant_Qualification","Manager_age","Manager_Status","Manager_Gender","Manager_Business","Manager_Business2","Manager_Num_Application"]
    for c in columns:
        #Check if binary or continuous

        if len(df[c].unique())<=12:
            corr = spearmanr(df['Business_Sourced'],df[c])[0]
            print "spear",c,corr
            y = df['Business_Sourced']
            x = df[c]
            X = np.vstack((y,x))
            covar = np.cov(X)
        else:
            corr = pointbiserialr(df['Business_Sourced'],df[c])[0]
            print "point",c,corr
            y = df['Business_Sourced']
            x = df[c]
            X = np.vstack((y,x))
            covar = np.cov(X)
        param.append(c)
        correlation.append(corr)
        abs_corr.append(abs(corr))
        # covariance.append(covar[0][1])
    print covariance
Пример #22
0
 def test_1d_wo_missing(self):
     "Test cov on 1D variable w/o missing values"
     x = self.data
     assert_almost_equal(np.cov(x), cov(x))
     assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False))
     assert_almost_equal(np.cov(x, rowvar=False, bias=True),
                         cov(x, rowvar=False, bias=True))
Пример #23
0
  def fit(self, data, chunks):
    """Learn the RCA model.

    Parameters
    ----------
    data : (n x d) data matrix
        Each row corresponds to a single instance
    chunks : (n,) array of ints
        When ``chunks[i] == -1``, point i doesn't belong to any chunklet.
        When ``chunks[i] == j``, point i belongs to chunklet j.
    """
    data, M_pca = self._process_data(data)

    chunks = np.asanyarray(chunks, dtype=int)
    chunk_mask, chunked_data = _chunk_mean_centering(data, chunks)

    inner_cov = np.cov(chunked_data, rowvar=0, bias=1)
    dim = self._check_dimension(np.linalg.matrix_rank(inner_cov))

    # Fisher Linear Discriminant projection
    if dim < data.shape[1]:
      total_cov = np.cov(data[chunk_mask], rowvar=0)
      tmp = np.linalg.lstsq(total_cov, inner_cov)[0]
      vals, vecs = np.linalg.eig(tmp)
      inds = np.argsort(vals)[:dim]
      A = vecs[:, inds]
      inner_cov = A.T.dot(inner_cov).dot(A)
      self.transformer_ = _inv_sqrtm(inner_cov).dot(A.T)
    else:
      self.transformer_ = _inv_sqrtm(inner_cov).T

    if M_pca is not None:
        self.transformer_ = self.transformer_.dot(M_pca)

    return self
Пример #24
0
def get_projection():
    #get the matrix for raw data
    cla0_matri = np.asmatrix(cla_0)
    cla1_matri = np.asmatrix(cla_1)
    #compute the mean for each classes
    #select the 8 features
    mu_0 =(cla0_matri.transpose()[:8]).mean(1)
    mu_1 =(cla1_matri.transpose()[:8]).mean(1)
    #print mu_0,mu_1
    #compute the covariance matrix for each class
    cov_0 = np.asmatrix(np.cov(cla0_matri.transpose()[:8]))
    cov_1 = np.asmatrix(np.cov(cla1_matri.transpose()[:8]))
    #compute the scatter matrices s0 and s1 for each class
    s_0 = np.dot((len(cla0_matri)-1),cov_0)
    s_1 = np.dot((len(cla1_matri)-1),cov_1)
    #compute the winthin class scatter
    s_w = np.add(s_0,s_1)
    #compute the inverse of winthin calss scatter
    inv_s = np.linalg.inv(s_w)
    #get the finally optimal line direction v
    dir_v = np.matrix.dot(inv_s,np.subtract(mu_0,mu_1))
    print dir_v
    #get the projection for all data set
    proj_data = np.matrix.dot(dir_v.transpose(),((np.asmatrix(data_set)).transpose())[:8])
    proj_lis = (proj_data.tolist())[0]
    #adding the execlude labels to the projected data
    for it in range(0,len(proj_lis)):
        temp_lis = []
        temp_lis.append(float(proj_lis[it]))
        #adding the label
        temp_lis.append(int(data_set[it][8]))
        proj_data_set.append(temp_lis)
Пример #25
0
def kal0(x,sv=None,Kdisp=1.0,Nsamp=1000,L=5,Norder=3,pg=1.0,vg=1.0,
           sigma0=1000,N0=200,Prange=8):
  x = x.T
  # Time scale
  if sv is None:
    mux = x-mean(x,0)
    phi = unwrap(angle(mux[:,0]+1j*mux[:,1]))
    sv= 2*pi*x.shape[0]/abs(phi[-1]-phi[0])
  # System matrix
  A =  Kdisp*eye(2*x.shape[1])
  A[:x.shape[1],x.shape[1]:2*x.shape[1]] = eye(x.shape[1])/sv
  
  # Observation matrix
  C = zeros((x.shape[1],2*x.shape[1]))
  C[:x.shape[1],:x.shape[1]] = eye(x.shape[1])
  
  # Observation covariance
  R = cov((x[:-1]-x[1:]).T)/sqrt(2.0)
  
  # System covariance
  idx = random.randint(x.shape[0]-5,size=(Nsamp))
  idx = vstack([idx+i for i in xrange(L)])
  tx = x[idx].reshape(idx.shape[0],-1)
  P = array([[(i-(L-1)/2)**j for i in xrange(L)] for j in xrange(Norder)])
  K = lstsq(P.T,tx)[0]
  s = (cov((tx-dot(P[:-1].T,K[:-1]))[1])-cov((tx-dot(P.T,K))[1]))/cov((tx-dot(P[:-1].T,K[:-1]))[1])
  D = zeros_like(A)
  D[:x.shape[1],:x.shape[1]] = R*pg
  D[x.shape[1]:,x.shape[1]:] = R*vg
  Q = D*s
  return(Kalman(A,C,Q,R))
Пример #26
0
	def stop_training(self, destroy_training_set = True):
		self.covariance = numpy.cov(self.training_set.T)
		self.mean = numpy.mean(self.training_set, axis=0)
		xy = self.training_set[:,-2:]
		self.xycovariance = numpy.cov(xy.T)
		self.xymean = numpy.mean(xy, axis=0)
		self.training_set = None
    def run(self,X):
        if self.covType == "diag":
            Sigma = np.diag(np.diag(np.cov(X.T)))
        elif self.covType == "full":
            Sigma = np.cov(X.T)
        else:
            print "error"

        self.mu = None
        self.labels = None

        n,p = X.shape
        mu,pi = self._initialize(X)
        iter = 0
        converge = False
        while iter < self.maxIter and not converge:
            old_mu = mu.copy()
            old_pi = pi.copy()
            gamma = self._estep(X, old_mu, Sigma, old_pi)
            mu,pi = self._mstep(X,gamma)
            if np.sum(abs(old_mu-mu))/np.sum(abs(old_mu))<0.001:
                converge=True
                print("GMM algorithm converges in "+str(iter+1)+" iterations")
            iter = iter + 1
        if iter == self.maxIter:
            print("GMM algorithm fails to converge in "+str(iter)+" iterations")

        labels = [np.argmax(g) for g in gamma]
        self.mu = mu
        self.labels =labels
def plt_1d(class1, class2):

    prior1 = 0.5
    prior2 = 0.5

    mean1 = np.array([np.mean(class1[:, 0])])
    mean2 = np.array([np.mean(class2[:, 0])])

    # print mean1, mean2

    cov1 = np.array([[np.cov([class1[:, 0]])]])
    cov2 = np.array([[np.cov([class2[:, 0]])]])

    # print cov1, cov2

    discriminant_function1 = gdf.gen_discriminant_function_of_normal_distribution(mean1, cov1, prior1)
    discriminant_function2 = gdf.gen_discriminant_function_of_normal_distribution(mean2, cov2, prior2)

    # X = np.linspace(np.amin(class1[:, 0]), np.amax(class1[:, 0]), 200)

    X = np.linspace(-100, 100, 100)

    y1 = [discriminant_function1(np.array([x])) for x in X]

    y2 = [discriminant_function2(np.array([x])) for x in X]

    plt.plot(X, y1)

    plt.plot(X, y2)

    plt.show()
Пример #29
0
def covandcoef(compare_data):
	hx = []
	hy = []

	ox = []
	oy = []

	tx = []
	ty = []

	for i in compare_data:
		hx.append(i[4])
		hy.append(i[7])


	for i in range(0,7):
		ox.append(compare_data[i][4])
		oy.append(compare_data[i][7])


	for i in range(0,89):
		tx.append(compare_data[i][4])
		ty.append(compare_data[i][7])


	X = np.vstack((hx,hy))
	Z = np.vstack((ox,oy))
	Y = np.vstack((tx,ty))


	return [[np.cov(X)[0][1],np.corrcoef(X)[0][1]],[np.cov(Y)[0][1],np.corrcoef(Y)[0][1]],[np.cov(Z)[0][1],np.corrcoef(Z)[0][1]]]
Пример #30
0
def ldaTransform(data):
	C0 = data[data[:, -1] == -1]
	C1 = data[data[:, -1] == 1]
	C0 = C0[:, :-1]
	C1 = C1[:, :-1]	
	S0 = np.cov(np.transpose(C0))
	S1 = np.cov(np.transpose(C1))
	SW = S0 + S1
	Mu0 = np.mean(C0, axis = 0)
	Mu1 = np.mean(C1, axis = 0)
	Mu = np.mean(data, axis = 0)
	Mu = Mu[:-1]
	Mu = np.matrix(Mu)
	Mu0 = np.matrix(Mu0)
	Mu1 = np.matrix(Mu1)
	SB = C0.shape[0] * np.transpose(Mu0 - Mu) * (Mu0 - Mu) + C1.shape[0] * np.transpose(Mu1 - Mu) * (Mu1 - Mu)
	Swin = LA.pinv(SW) #costly 
	Swin = np.matrix(Swin)
	SwinSB = Swin * SB #costly 
	e, v = LA.eig(SwinSB) #costly 
	s = np.argsort(e)[::-1]
	v = np.array(v)
	ev = np.zeros(v.shape)
	for i in xrange(e.shape[0]):
		ev[:, i] = v[:, s[i]]
	w = ev[:, 0]
	w = np.matrix(w)
	return w
Пример #31
0
    def fit(self, descriptors, use_multiprocessing=True):
        """
        Fit the ITQ model given the input set of descriptors

        :param descriptors: Iterable of ``DescriptorElement`` vectors to fit
            the model to.
        :type descriptors:
            collections.Iterable[smqtk.representation.DescriptorElement]

        :raises RuntimeError: There is already a model loaded

        :return: Matrix hash codes for provided descriptors in order.
        :rtype: numpy.ndarray[bool]

        """
        if self.has_model():
            raise RuntimeError("Model components have already been loaded.")

        dbg_report_interval = None
        if self.logger().getEffectiveLevel() <= logging.DEBUG:
            dbg_report_interval = 1.0  # seconds
        if not hasattr(descriptors, "__len__"):
            self._log.info("Creating sequence from iterable")
            descriptors_l = []
            rs = [0] * 7
            for d in descriptors:
                descriptors_l.append(d)
                report_progress(self._log.debug, rs, dbg_report_interval)
            descriptors = descriptors_l
        self._log.info("Creating matrix of descriptors for fitting")
        x = elements_to_matrix(descriptors,
                               report_interval=dbg_report_interval,
                               use_multiprocessing=use_multiprocessing)
        self._log.debug("descriptor matrix shape: %s", x.shape)

        self._log.debug("Info normalizing descriptors by factor: %s",
                        self.normalize)
        x = self._norm_vector(x)

        self._log.info("Centering data")
        self.mean_vec = numpy.mean(x, axis=0)
        x -= self.mean_vec

        self._log.info("Computing PCA transformation")
        # numpy and matlab observation format is flipped, thus the added
        # transpose.
        self._log.debug("-- computing covariance")
        c = numpy.cov(x.transpose())

        # Direct translation from UNC matlab code
        # - eigen vectors are the columns of ``pc``
        self._log.debug('-- computing linalg.eig')
        l, pc = numpy.linalg.eig(c)
        # ordered by greatest eigenvalue magnitude, keeping top ``bit_len``
        self._log.debug('-- computing top pairs')
        top_pairs = sorted(zip(l, pc.transpose()),
                           key=lambda p: p[0],
                           reverse=1)[:self.bit_length]

        # # Harry translation -- Uses singular values / vectors, not eigen
        # # - singular vectors are the rows of pc
        # # - I think there is an additional error of not taking the transpose
        # #   of ``pc`` when computing ``top_pairs``.
        # pc, l, _ = numpy.linalg.svd(c)
        # top_pairs = sorted(zip(l, pc),
        #                    key=lambda p: p[0],
        #                    reverse=1
        #                    )[:self.bit_length]

        # Eigen-vectors of top ``bit_len`` magnitude eigenvalues
        self._log.debug("-- top vector extraction")
        pc_top = numpy.array([p[1] for p in top_pairs]).transpose()
        self._log.debug("-- transform centered data by PC matrix")
        xx = numpy.dot(x, pc_top)

        self._log.info("Performing ITQ to find optimal rotation")
        c, self.rotation = self._find_itq_rotation(xx, self.itq_iterations)
        # De-adjust rotation with PC vector
        self.rotation = numpy.dot(pc_top, self.rotation)

        self.save_model()

        return c
def single_factor_analysis(X, k=5, cycles=100, tol=.001):
    """ Fit Factor Analysis model using EM

    Iterative Expectation-Maximization algorithm that stops once
    a proportional change less than the specified tolerance in
    the log likelihood or the specified number of cycles has been
    reached. Return matrices are strictly real-valued.

    Parameters
    ----------
    X : array_like, shape (n_samples, n_features)
        A 2-D data matrix real-valued
    k : int, optional
        Number of factors (default 5)
    cycles : int, optional
        Maximum number of cycles of EM (default 100)
    tol : float, optional
        Tolerance value (default 0.001)

    Returns
    -------
    Lambda : array_like
        A 2-D ndarray containing the factor loading matrix (Lambda)
    Psi : array_like
        A 2-D ndarray containing the diagonal uniquenesses matrix
    lkhd_list : list
        List of log likelihood values during iterations.
        Generally follows a positive logarithmic curve. 

    Notes
    -----
    Based upon the algorithm initially described in:
    http://www.cs.toronto.edu/~fritz/absps/tr-96-1.pdf

    """

    n_samples, n_features = X.shape

    # X assumed to be zero mean
    row_mean = np.mean(X, axis=0)
    X -= row_mean[np.newaxis, :]  # subtract mean of row
    # X'*X followed by element-wise division by n_samples
    XX = X.T.dot(X) / n_samples
    XX_diag = np.diag(XX)

    cov_X = np.cov(X, rowvar=False)  #shape n_features by n_features
    scale = np.linalg.det(cov_X)**(1 / n_features)
    Psi = np.diag(cov_X)

    # Start Lambda at random values
    Lambda = np.random.randn(n_features, k) * np.sqrt(scale / k)

    I = np.eye(k)

    const = -n_features / 2 * np.log(2 * np.pi)

    log_lkhd = 0
    lkhd_list = []
    for i in range(cycles):
        # compute expectation
        Psi_diag = np.diag(1 / Psi)  # diag of element-wise ** -1
        PsiLambda = Psi_diag.dot(Lambda)
        # solve matrix inversion
        M = Psi_diag - PsiLambda.dot(np.linalg.inv(I +\
                Lambda.T.dot(PsiLambda))).dot(PsiLambda.T)
        M_det = np.sqrt(np.linalg.det(M))
        Beta = Lambda.T.dot(M)
        # first moment of factors
        XXBeta_prime = XX.dot(Beta.T)
        # compute second moment of factors
        ZZ = I - Beta.dot(Lambda) + Beta.dot(XXBeta_prime)

        # compute log likelihood
        last_log_lkhd = log_lkhd
        log_lkhd = n_samples * const + n_samples * np.log(
            M_det) - 0.5 * n_samples * np.sum(np.diag(M.dot(XX)))
        lkhd_list.append(log_lkhd)

        # compute maximization
        # update Lambda
        Lambda = XXBeta_prime.dot(np.linalg.inv(ZZ))
        # update Psi
        Psi = XX_diag - np.diag(Lambda.dot(XXBeta_prime.T))

        if i == 0:
            log_lkhd_init = log_lkhd
        elif (log_lkhd < last_log_lkhd):
            warnings.warn('Local decrease in log likelihood')
        elif ((log_lkhd - log_lkhd_init) < (1 + tol) *\
                (last_log_lkhd - log_lkhd_init)):
            break
    else:
        warnings.warn(
            'Factor analysis did not converge for tol : %.4f, you may want to increase the cycles'
            % tol)

    return Lambda, Psi, lkhd_list
Пример #33
0
    def step(self):
        self._current_generation += 1

        print('-' * 10)
        print('Generation', self._current_generation)

        for species, population in enumerate(self._populations):
            print('-- Species', species)
            self.current_group = species
            population_eval = {
                mem: self._cached_evaluation(mem)
                for mem in population
            }

            # Sorted population by evaluation. Largest/worst member first
            sorted_population = sorted(population_eval,
                                       key=lambda x: population_eval[x],
                                       reverse=True)

            member_points = np.array([
                self._decode_member(mem)
                for mem in sorted_population[self._mu_important:]
            ])

            covariance = np.copy(self._covariances[species])
            num_members = len(member_points)
            for i in range(member_points.shape[1]):
                #E_i = np.mean(member_points[:,i])  # estimated expected value for dimension i
                E_i = self._centroids[species][i]
                for j in range(i, member_points.shape[1]):
                    covariance[i, j] = np.sum(
                        (member_points[:, i] - E_i)**2) / (num_members - 1)
                    covariance[j, i] = covariance[i, j]
            self._covariances[species] = np.cov(member_points.T)
            self._covariance_histories[species].append(
                np.copy(self._covariances[species]))

            self._centroids[
                species] = self._centroids[species] + self._learning_rate * (
                    np.mean(member_points, axis=0) - self._centroids[species])
            self._centroid_histories[species].append(
                np.copy(self._centroids[species]))

            print('Covariance & mean:')
            print(self._covariances[species])
            print('\t\t\t\t', self._centroids[species])
            print('Expansion?')
            #for d in range(self.num_dimensions):
            avg_covariance = np.mean(self._covariance_histories[species][-5:],
                                     axis=0)
            cur_dimension_variance = np.mean(
                np.diag(self._covariances[species]))
            avg_dimension_variance = np.mean(np.diag(avg_covariance))
            print('Cur variance over dimensions:', cur_dimension_variance)
            print('Avg variance over dimensions:', avg_dimension_variance)
            print('Averaged historical covariance:')
            print(np.mean(self._covariance_histories[species][-5:], axis=0))
            split_dimensions = []
            for d in range(self.num_dimensions):
                if len(self._centroid_histories[species]) > 2:
                    print(
                        'Centroid path for dimension %s is %.4f -> %.4f -> %.4f'
                        % (d, self._centroid_histories[species][-3][d],
                           self._centroid_histories[species][-2][d],
                           self._centroid_histories[species][-1][d]))
                    d0 = self._centroid_histories[species][-3][
                        d] - self._centroid_histories[species][-2][d]
                    d1 = self._centroid_histories[species][-2][
                        d] - self._centroid_histories[species][-1][d]

                    if np.sign(d0) != np.sign(d1):
                        print('Centroid path for dim %s changed direction' % d)
                        print(
                            'abs(c[-2]-c[-1]) = %.4f -> %.2f of variance V_d[t-1]=%.4f'
                            % (abs(d0), abs(d0) /
                               self._covariance_histories[species][-2][d, d],
                               self._covariance_histories[species][-2][d, d]))
                        print(
                            'abs(c[-1]-c[0]) = %.4f -> %.2f of variance V_d[t]=%.4f'
                            % (abs(d1),
                               abs(d1) / self._covariances[species][d, d],
                               self._covariance_histories[species][-1][d, d]))

                        if self._covariances[species][d, d] > np.maximum(
                                1, self._covariance_histories[species][-2][d,
                                                                           d]):
                            split_dimensions.append(d)

                if self._covariances[species][
                        d, d] > self._covariance_histories[species][-2][d, d]:
                    print(
                        'Dimension %s variance is greater than its last variance'
                        % d)
                if self._covariances[species][d, d] / np.mean(
                        avg_covariance[d, d]) > 1:
                    print(
                        'Dimension %s is greater than its historical average variance'
                        % d)
                if self._covariances[species][d,
                                              d] / avg_dimension_variance > 1:
                    print(
                        'Dimension %s is greater than total historical average variance'
                        % d)
                if self._covariances[species][d,
                                              d] / cur_dimension_variance > 1:
                    print(
                        'Dimension %s is greater than current average variance'
                        % d)

            new_members = np.array([
                self.sample_random(species) for _ in range(self._mu_important)
            ])
            member_points = np.concatenate([member_points, new_members])

            self._populations[species] = {
                self._encode_member(mem)
                for mem in member_points
            }

            # Perform possible species merges/deaths
            # Deaths: species is consistently worse than others
            # Merges: species close to each other

            # Perform possible species splits
            if len(split_dimensions) > 0:
                print('Performing population split along dimensions %s' %
                      split_dimensions)
                centroid_0 = self._centroids[species]
                centroid_1 = np.copy(self._centroid_histories[species][-2])
                centroid_1[split_dimensions] = self._centroids[species][
                    split_dimensions]
                covariance = np.copy(self._covariances[species])
                new_species = len(self._populations)
                self._centroids.append(centroid_1)
                self._covariances.append(covariance)
                self._centroid_histories.append([centroid_1])
                self._covariance_histories.append([covariance])
                self._populations.append({
                    self._encode_member(self.sample_random(new_species))
                    for _ in range(self._population_size)
                })

                # delete historical record
                self._centroid_histories[species] = self._centroid_histories[
                    species][-2:]
                self._covariance_histories[
                    species] = self._covariance_histories[species][-2:]
Пример #34
0
def jeffrey_divergence_score(X, labels):
    """
    Implements the score based on the Jeffrey divergence that appears in:

    Said, A.; Hadjidj, R. & Foufou, S. "Cluster validity index based on Jeffrey divergence"
    Pattern Analysis and Applications, Springer London, 2015, 1-11

    :param X:
    :param labels:
    :return:
    """

    llabels = np.unique(labels)
    poslabels = maplabels(llabels)
    nclust = len(llabels)

    # compute the centroids
    centroids = np.zeros((nclust, X.shape[1]))
    for idx in llabels:
        center = np.zeros((1, X.shape[1]))
        center_mask = labels == idx
        center += np.sum(X[center_mask], axis=0)
        center /= center_mask.sum()
        centroids[poslabels[idx]] = center

    lcovs = []
    linvcovs = []
    for idx in llabels:
        cov_mask = labels == idx
        covar = np.cov(X[cov_mask].T)
        lcovs.append(covar)
        linvcovs.append(np.linalg.inv(covar))

    traces = np.zeros((nclust, nclust))
    for idx1 in llabels:
        for idx2 in llabels:
            traces[poslabels[idx1], poslabels[idx2]] = np.trace(
                np.dot(linvcovs[poslabels[idx1]], lcovs[poslabels[idx2]]))
            traces[poslabels[idx1], poslabels[idx2]] += np.trace(
                np.dot(linvcovs[poslabels[idx2]], lcovs[poslabels[idx1]]))
            traces[poslabels[idx1], poslabels[idx2]] /= 2.0

    sumcov = np.zeros((nclust, nclust))
    for idx1 in llabels:
        for idx2 in llabels:
            v1 = centroids[poslabels[idx1]]
            v2 = centroids[poslabels[idx2]]
            vm = v1 - v2
            mcv = linvcovs[poslabels[idx1]] + linvcovs[poslabels[idx2]]
            sumcov[poslabels[idx1],
                   poslabels[idx2]] = np.dot(vm.T, np.dot(mcv, vm))
            sumcov[poslabels[idx1], poslabels[idx2]] /= 2.0

    ssep = 0.0
    for idx1 in llabels:
        minv = np.inf
        for idx2 in llabels:
            if idx1 != idx2:
                val = traces[poslabels[idx1], poslabels[idx2]] + sumcov[
                    poslabels[idx1], poslabels[idx2]] - centroids.shape[1]
                if minv > val:
                    minv = val
        ssep += minv

    scompact = 0.0
    for idx in llabels:
        center_mask = labels == idx
        dvector = euclidean_distances(X[center_mask],
                                      centroids[poslabels[idx]],
                                      squared=True)
        scompact += dvector.max()

    return scompact / ssep
Пример #35
0
    
np.savetxt("results/" + todaystr + "/CFDvecs.tsv", results, delimiter='\t')

#import io 

#out_m = io.open('meta.tsv', 'w', encoding='utf-8')
#for i in valid_generator:
    #[out_m.write(str(x) + "\n") for x in valid_generator.filenames]
#out_m.close()



#------------------------------------------------------------



MU = np.mean(results, axis=0)
SIGMA = np.cov(results, rowvar=0)
                           
from scipy.stats import multivariate_normal
var = multivariate_normal(MU, SIGMA)
pdftest=var.pdf(results)                           
log_pdftest= np.log(pdftest)
np.savetxt("results/" + todaystr + "/CFD_LL.tsv", log_pdftest, delimiter='\t')






Пример #36
0
    # Test on 2D Gaussian
    mu = np.array([-2, 5])
    a = npr.rand(2, 2)
    cov = np.dot(a, a.T)

    mvn = priors.MultivariateNormal(mu=mu, cov=cov)
    x_samples = np.zeros((2, n))
    x = np.zeros(2)

    for i in xrange(n):
        if i % 1000 == 0:
            print 'Sample %d/%d' % (i, n)

        x, cur_ll = slice_sample(x, mvn.logprob)
        x_samples[:, i] = x.copy()

    mu_samp = np.mean(x_samples, axis=1)
    print '2D Gaussian:'
    print 'Actual mean:     [%f,%f]' % (mu[0], mu[1])
    print 'Mean of samples: [%f,%f]' % (mu_samp[0], mu_samp[1])
    print 'Actual Cov:'
    print str(cov)
    print 'Cov of samples'
    print str(np.cov(x_samples))

    # plt.figure(1)
    # plt.clf()
    # plt.hist(x_samples, 40)
    # plt.savefig('slice_sampler_test.pdf')
Пример #37
0
from numpy import array
from numpy import mean
from numpy import cov
from numpy.linalg import eig
# define a matrix
A = array([[1, 2], [3, 4], [5, 6]])
print(A)
# calculate the mean of each column
M = mean(A.T, axis=1)
print(M)
# center columns by subtracting column means
C = A - M
print(C)
# calculate covariance matrix of centered matrix
V = cov(C.T)
print(V)
# eigendecomposition of covariance matrix
values, vectors = eig(V)
print(vectors)
print(values)
# project data
P = vectors.T.dot(C.T)
print(P.T)
Пример #38
0
         vale_closing_prices,
         label='AAPL Closing Prices',
         linewidth=2,
         color='orangered',
         linestyle='-')

# 计算两个股票的协方差
bhp_mean = np.mean(bhp_closing_prices)
vale_mean = np.mean(vale_closing_prices)
# 离差
d1 = bhp_closing_prices - bhp_mean
d2 = vale_closing_prices - vale_mean
cov = np.mean(d1 * d2)
print(cov)

# 计算相关系数
s = cov / (np.std(bhp_closing_prices) * \
           np.std(vale_closing_prices))
print(s)

# 获取相关性矩阵
m = np.corrcoef(bhp_closing_prices, vale_closing_prices)
print(m)
# 获取协方差矩阵
cm = np.cov(bhp_closing_prices, vale_closing_prices)
print(cm)

plt.legend()
plt.gcf().autofmt_xdate()
plt.show()
Пример #39
0
def get_gaussian_cov_matrix(df: pd.DataFrame,
                            four_momentum_columns: List[str]) -> np.ndarray:
    return np.cov(df.loc[:, four_momentum_columns], rowvar=False)
import math
import pylab

mu_1 = 4
variance_1 = 4
sigma_1 = math.sqrt(variance_1)
X_1 = np.random.normal(mu_1, sigma_1, 100)
print "the mean of X_1 is %s" %(np.mean(X_1))

mu_2 = 3
variance_2 = 9
sigma_2 = math.sqrt(variance_2)
X_2 = X_1/2 + np.random.normal(mu_2, sigma_2, 100)
print "the mean of X_2 is %s" %(np.mean(X_2))

cov_matrix=np.cov(X_1,X_2)
print "the covarience matrix is %s" %(cov_matrix)
val, vec = np.linalg.eig(cov_matrix)
print "the eigenvalues are %s" %(val)
print "the eigenvectors are %s" %(vec)

plt.figure(1)
pylab.ylim([-15,15])
pylab.xlim([-15,15])
plt.scatter(X_1,X_2)


mu = (np.mean(X_1),np.mean(X_2))
Vec_A = [np.mean(X_1),np.mean(X_2),vec[0][0], vec[0][1]]
Vec_B = [np.mean(X_1),np.mean(X_2),vec[1][0], vec[1][1]]
Пример #41
0
nData = X_train.shape[0]
nDimension = X_train.shape[1]
# Applying BIC to find optimal nCluster: 7
# nCluster = Optimize_nCluster.optimal_number_of_components(X_train)
nCluster = 7
# Applying K-means to initialize parameters
kmeans = KMeans(n_clusters=nCluster, random_state=0).fit(X_train)
means = kmeans.cluster_centers_  # init mu
priors = np.zeros(nCluster)
covariances = np.zeros((nCluster, nDimension, nDimension))      # using "full" covariance_type

for k in range(nCluster):
    Xk = X_train[np.where(kmeans.labels_ == k)[0]]
    priors[k] = float(Xk.shape[0]) / nData
    if np.size(Xk):
        covariances[k] = np.cov(Xk.T)       #Initialzie covariance matrices via points in each KMeans-cluster
    else:
        covariances[k] = np.cov(X_train.T)

# part2.2: Expectation-Maximization
def calculate_probability_density(X, means, covariances):
    probability_density = np.zeros((nData, nCluster))
    for i in range(X.shape[0]):
        for k in range(nCluster):
            vector_2d = np.reshape((X[i] - means[k]), (nDimension, 1))
            a = np.exp(-0.5 * np.dot(np.dot(vector_2d.T, np.linalg.inv(covariances[k])), vector_2d)[0][0])
            b = np.sqrt(np.power(2 * np.pi, nDimension) * np.linalg.det(covariances[k]))
            #if (i == 0 and k == 0) : print(np.linalg.det(covariances[0]))
            #print(i, np.power(2 * np.pi, nDimension) * np.linalg.det(covariances[k]))
            #print(i, np.linalg.det(covariances[k]))
            probability_density[i][k] = a / b
Пример #42
0
def estimate_full_gradient_var(data):
    var_4_fg = np.cov(data, rowvar=False)
    var_fg = var_4_fg / (N)
    return var_fg
Пример #43
0
def random_covariance(n):
    pts = np.random.rand(n,n+1)
    return np.cov(pts)
Пример #44
0
def cem_evolution_step(mean, cov, params, sorted_ids):
    best_ids = sorted_ids[:ELITE_SIZE]
    new_mean = (1-ALPHA) * mean + ALPHA * np.mean(params[best_ids], 0)
    new_cov = (1-ALPHA) * cov + ALPHA * np.cov(params[best_ids], rowvar=False)
    return (new_mean, new_cov)
Пример #45
0
    return matrix


if __name__ == '__main__':
    X = [1, -1, 4]
    Y = [2, 1, 3]
    Z = [1, 3, -1]

    dat = np.column_stack([X, Y, Z]).T
    print dat

    print 'covs'
    print 'cov(X, X): {:.2f} {:.2f}'.format(cov(X, X), var(X))
    print 'cov(Y, Y): {:.2f} {:.2f}'.format(cov(Y, Y), var(Y))
    print 'cov(Z, Z): {:.2f} {:.2f}'.format(cov(Z, Z), var(Z))

    print 'mine'
    for r in covmat(X, Y, Z):
        print(' {:> 3.2f}' * len(r)).format(*r)

    print 'numpy'
    for r in np.cov(dat):
        print(' {:> 3.2f}' * len(r)).format(*r)

    'eigenvectors'
    print np.linalg.eig(np.cov(dat))
    for r in np.linalg.eig(np.cov(dat)):
        print r
        # print (' {:> 3.2f}'*len(r)).format(*r)
Пример #46
0
plt.title(target)
plt.xlabel('Log(concentration)')

# root mean squared error
# incorporate retention time
# spearman rank coefficient

# %%
# manuelly check r_value
# pearson correlation coefficient
from numpy import cov
from scipy.stats import pearsonr

target = 'c8'
mask = new_data_melt['compound'] == target
covariance = cov(new_data_melt[mask]['concentration'],
                 new_data_melt[mask]['area'])

p_r_value = pearsonr(new_data_melt[mask]['concentration'],
                     new_data_melt[mask]['area'])
print(p_r_value)

# %%
# now I need to remake the linear equation for each regression line
# and plug in the respective area values for y and find the concentration

# store all dataframes in a dictionary
dict = {compound: pd.DataFrame() for compound in reg_df.index}

# the range of x values only having the concentrations of interest
new_x = [1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]
new_x = np.log10(new_x)
def main():
    input_gps_csv_file = "data/gps.csv"  # define the file name
    input_acc_csv_file = "data/data.csv"
    input_dist_csv_file = "data/distance.csv"

    # read the file and extract the required data
    lat, lon, time_gps = CSVReadGPS(input_gps_csv_file)
    accX, accY, time_acc = CSVReadAcc(input_acc_csv_file)
    dist, time_ultra = CSVReadUltra(input_dist_csv_file)

    # loop in the lists to estimate the velocities
    num_gps_val = len(lat)  # Number of GPS data points captured
    num_acc_val = len(accX)  # Number of Accelerometer data points captured
    num_dist_val = len(dist)  # number of ultrasound readings collected

    #### Calculate the covariances of data

    # GPS based velocity estimate
    velNorth = []
    velWest = []
    velNorth.append(0)  # intial Velocity
    velWest.append(0)  # intial Velocity
    for i in range(1, num_gps_val):
        y_t = GPSVel(time_gps[i], lat[i], lon[i], time_gps[i - 1], lat[i - 1],
                     lon[i - 1])
        velNorth.append(y_t[0])
        velWest.append(y_t[1])

    # Accelerometer based Velocity Estimate
    velX = []
    velY = []
    velX.append(0)  # intial Velocity
    velY.append(0)  # intial Velocity
    for i in range(1, num_acc_val):
        acc_time_prev = time_acc[i - 1]
        acc_time_cur = time_acc[i]

        acc_time_cur_temp = acc_time_cur.split(":")
        acc_time_cur_temp = [float(item) for item in acc_time_cur_temp]
        acc_time_prev_temp = acc_time_prev.split(":")
        acc_time_prev_temp = [float(item) for item in acc_time_prev_temp]
        del_acc_t = (acc_time_cur_temp[0] - acc_time_prev_temp[0]) * 3600 + (
            acc_time_cur_temp[1] - acc_time_prev_temp[1]) * 60 + (
                acc_time_cur_temp[2] - acc_time_prev_temp[2])

        # Measurements from the accelerometer
        a_t = np.array([2, 1], dtype=np.float32)
        a_t[0] = accX[i]  # X-direction
        a_t[1] = accY[i]  # Y-direction

        velX.append(a_t[0] * del_acc_t + velX[i - 1])  # V = U + a*t
        velY.append(a_t[1] * del_acc_t + velY[i - 1])

    cov_gps_velN = np.cov(velNorth)
    cov_gps_velW = np.cov(velWest)
    cov_acc_velX = np.cov(velX)
    cov_acc_velY = np.cov(velY)
    cov_dist = np.cov(dist)

    print("Covariances Calculated")

    # Accelerometer Data acquisition frequency is higher than GPS. GPS: update, Accelerometer: Prediction
    # Intialize the filter parameters:
    # Velocity Estimate
    x_init = np.array([velX[0], velY[0]])
    p_init = np.array([1, 0, 1, 0])  # initialize with high covariance value
    q_matrix = np.array([cov_acc_velX, 0, 0, cov_acc_velY], dtype=np.float32)
    r_matrix = np.array([cov_gps_velN, 0, 0, cov_gps_velW], dtype=np.float32)
    acc_data_count = 1  # counter for data index in accelermeter
    ultra_data_count = 1  # counter for data index in ultrasound prediction step
    # intialize temporary variables
    acc_data_count_temp = 0
    ultra_data_count_temp = 0

    xd_init = dist[0]
    pd_init = 1e10
    r_ultra = cov_dist

    x_prior = np.empty([2, 1])
    p_prior = np.empty([2, 2])
    xd_prior = 0
    pd_prior = 0

    x_post = np.empty([2, 1])
    p_post = np.empty([2, 2])
    xd_post = 0
    pd_post = 0

    u_t = np.empty([2, 1])
    y_t = np.empty([2, 1])

    # store the state vector and covariance matrix
    #x_post_vec = np.empty([num_gps_val,2], dtype = np.float32)
    x1_post_vec = []
    p1_post_vec = []
    x2_post_vec = []
    p2_post_vec = []

    x_post_arr = np.zeros((24, 4))
    p_post_arr = np.zeros((24, 4))
    time_arr = np.zeros((24, 1))

    xd_post_arr = np.zeros((3, 1))
    pd_post_arr = np.zeros((3, 1))
    d_time_arr = np.zeros((3, 1))

    print("Q_matrix: ", q_matrix.reshape(2, 2))
    print("R_matrix: ", r_matrix.reshape(2, 2))

    # Filter Implementation
    for i in range(1, num_gps_val):

        y_t = GPSVel(time_gps[i], lat[i], lon[i], time_gps[i - 1], lat[i - 1],
                     lon[i - 1])

        gps_time_cur_temp = time_gps[i].split(":")
        gps_time_cur_temp = [float(item) for item in gps_time_cur_temp]
        gps_time_cur_temp = GMTCDTconv(gps_time_cur_temp)
        gps_time_prev_temp = time_gps[i - 1].split(":")
        gps_time_prev_temp = [float(item) for item in gps_time_prev_temp]
        gps_time_prev_temp = GMTCDTconv(gps_time_prev_temp)
        gps_del_t = (gps_time_cur_temp[0] - gps_time_prev_temp[0]) * 3600 + (
            gps_time_cur_temp[1] - gps_time_prev_temp[1]) * 60 + (
                gps_time_cur_temp[2] - gps_time_prev_temp[2])

        # accumulating previous estimates
        if i == 1:
            x_prev_est = x_init
            p_prev = p_init
        else:
            x_prev_est = x_post
            p_prev = p_post
        acc_data_count = acc_data_count_temp + acc_data_count
        acc_data_count_temp = 0
        # Prediction Steps
        for j in range(acc_data_count, num_acc_val):
            if acc_data_count_temp > 0:  # Account for multiple prediction steps
                x_prev_est = x_prior
                p_prev = p_prior

            acc_data_count_temp = acc_data_count_temp + 1  # update the counter
            acc_time_cur = time_acc[j]
            acc_time_cur_temp = acc_time_cur.split(":")
            acc_time_cur_temp = [float(item) for item in acc_time_cur_temp]
            time_diff = (
                acc_time_cur_temp[0] - gps_time_cur_temp[0]) * 3600 + (
                    acc_time_cur_temp[1] - gps_time_cur_temp[1]) * 60 + (
                        acc_time_cur_temp[2] - gps_time_cur_temp[2])

            if time_diff < 0:  # proceed to the prediction step for all the prediction steps before the GPS readings
                acc_time_prev = time_acc[j - 1]
                #time difference
                acc_time_prev_temp = acc_time_prev.split(":")
                acc_time_prev_temp = [
                    float(item) for item in acc_time_prev_temp
                ]
                del_acc_t = (
                    acc_time_cur_temp[0] - acc_time_prev_temp[0]) * 3600 + (
                        acc_time_cur_temp[1] - acc_time_prev_temp[1]) * 60 + (
                            acc_time_cur_temp[2] - acc_time_prev_temp[2])

                # Measurements from the accelerometer
                u_t = np.array([2, 1], dtype=np.float32)
                u_t[0] = accX[j]  # X-direction
                u_t[1] = accY[j]  # Y-direction

                # Kalman Filter Prediction
                x_prior, p_prior = KF(x_prev_est, u_t, del_acc_t, 0, q_matrix,
                                      0, p_prev, True, False)
                acc_data_count = acc_data_count + 1

            else:
                break  # end of prediction loop

        # run the update only if there is a prediction steps
        if acc_data_count_temp > 0:
            # call the KF update
            x_post, p_post = KF(x_prior, u_t, del_acc_t, y_t, 0, r_matrix,
                                p_prior, False, True)

            print("P: ", x_post)

            ######################################################################################################################################################################pyt
            #print(p_post)
            x1_post_vec.append(x_post[0][0])
            p1_post_vec.append(p_post[0])
            x2_post_vec.append(x_post[0][1])
            p2_post_vec.append(p_post[3])

            if i < 24:
                cur_time = (gps_time_cur_temp[0]) * 3600 + (
                    gps_time_cur_temp[1]) * 60 + (gps_time_cur_temp[2])
                time_arr[i] = 84600 - cur_time

                x_post_new = x_post.flatten()

                if x_post_new.shape[0] == 2:
                    x_post_arr[i, :2] = x_post_new
                    p_post_arr[i, :] = p_post.flatten()
                else:
                    x_post_arr[i, :] = x_post.flatten()
                    p_post_arr[i, :] = p_post.flatten()

            ####
            # Distance Estimation
            if i == 1:
                xd_prev_est = xd_init
                pd_prev = pd_init
                kfd = kalman(pd_init, r_ultra, xd_prev_est,
                             pd_prev)  # filter initialization
            else:
                xd_prev_est = xd_post
                pd_prev = pd_post
                kfd = kalman(p_post[3], r_ultra, xd_prev_est,
                             pd_prev)  # filter initialization

            # prediction using calculated velocity

            xd_prior, pd_prior = kfd.update_meas(0, gps_del_t, x_post[0][1],
                                                 False)

            # Update step: iterate over the different measurement data and use the value which is recorded closest to the current GPS time step
            ultra_data_count_temp = 0
            gps_ultra_diff = 0
            for j in range(ultra_data_count + 1, num_dist_val):
                # Time of the measurement
                dis_time_cur = time_ultra[j]
                dis_time_cur_temp = dis_time_cur.split(":")
                dis_time_cur_temp = [float(item) for item in dis_time_cur_temp]
                #print(dis_time_cur_temp)
                #print(gps_time_cur_temp)
                # keep iterating until the difference is positive
                gps_ultra_diff = (
                    dis_time_cur_temp[0] - gps_time_cur_temp[0]) * 3600 + (
                        dis_time_cur_temp[1] - gps_time_cur_temp[1]) * 60 + (
                            dis_time_cur_temp[2] - gps_time_cur_temp[2])
                if gps_ultra_diff < 0:
                    ultra_data_count_temp = ultra_data_count_temp + 1
                else:
                    break
                    # break the for loop if measurement when ahead of prediction

            # end of for loop
            ultra_data_count_tem = ultra_data_count
            ultra_data_count = ultra_data_count + ultra_data_count_temp - 1

            if ultra_data_count_tem == ultra_data_count:
                continue
            if ultra_data_count_temp > 0:  # only perform update if a measurement is detected in the given range
                #print(ultra_data_count)
                xd_post, pd_post = kfd.update_meas(dist[ultra_data_count],
                                                   gps_del_t, x_post[0][1],
                                                   True)

                if i < 3:
                    time_curr = (gps_time_cur_temp[0]) * 3600 + (
                        gps_time_cur_temp[1]) * 60 + (gps_time_cur_temp[2])
                    d_time_arr[i] = 84600 - time_curr
                    xd_post_arr[i] = xd_post
                    pd_post_arr[i] = pd_post

            mass = 1.81  # kg (~ 4 pounds)
            radius = 0.062 / 2  # meters
            height = 0.033  # meters (bump height)
            wheelbumpdynamics(mass, radius, x_post_arr[0, 0], height)
            print('================================================')
        # End of Distance Estimation
        else:
            continue

    # end of for loop
    #print(p1_post_vec)
    #print("R_ultra: ", r_ultra)
    #print(pd_post)
    #print(p2_post_vec)
    plot_results(x_post_arr, p_post_arr, time_arr, xd_post_arr, pd_post_arr,
                 d_time_arr)
Пример #48
0
    def plot_missing_pattern(self,
                             ax=None,
                             row_order="pattern",
                             column_order="pattern",
                             hide_complete_rows=False,
                             hide_complete_columns=False,
                             color_row_patterns=True):
        """
        Generate an image showing the missing data pattern.

        Parameters
        ----------
        ax : matplotlib axes
            Axes on which to draw the plot.
        row_order : string
            The method for ordering the rows.  Must be one of 'pattern',
            'proportion', or 'raw'.
        column_order : string
            The method for ordering the columns.  Must be one of 'pattern',
            'proportion', or 'raw'.
        hide_complete_rows : boolean
            If True, rows with no missing values are not drawn.
        hide_complete_columns : boolean
            If True, columns with no missing values are not drawn.
        color_row_patterns : boolean
            If True, color the unique row patterns, otherwise use grey
            and white as colors.

        Returns
        -------
        A figure containing a plot of the missing data pattern.
        """

        # Create an indicator matrix for missing values.
        miss = np.zeros(self.data.shape)
        cols = self.data.columns
        for j, col in enumerate(cols):
            ix = self.ix_miss[col]
            miss[ix, j] = 1

        # Order the columns as requested
        if column_order == "proportion":
            ix = np.argsort(miss.mean(0))
        elif column_order == "pattern":
            cv = np.cov(miss.T)
            u, s, vt = np.linalg.svd(cv, 0)
            ix = np.argsort(cv[:, 0])
        elif column_order == "raw":
            ix = np.arange(len(cols))
        else:
            raise ValueError(column_order +
                             " is not an allowed value for `column_order`.")
        miss = miss[:, ix]
        cols = [cols[i] for i in ix]

        # Order the rows as requested
        if row_order == "proportion":
            ix = np.argsort(miss.mean(1))
        elif row_order == "pattern":
            x = 2**np.arange(miss.shape[1])
            rky = np.dot(miss, x)
            ix = np.argsort(rky)
        elif row_order == "raw":
            ix = np.arange(miss.shape[0])
        else:
            raise ValueError(row_order +
                             " is not an allowed value for `row_order`.")
        miss = miss[ix, :]

        if hide_complete_rows:
            ix = np.flatnonzero((miss == 1).any(1))
            miss = miss[ix, :]

        if hide_complete_columns:
            ix = np.flatnonzero((miss == 1).any(0))
            miss = miss[:, ix]
            cols = [cols[i] for i in ix]

        from statsmodels.graphics import utils as gutils
        from matplotlib.colors import LinearSegmentedColormap

        if ax is None:
            fig, ax = gutils.create_mpl_ax(ax)
        else:
            fig = ax.get_figure()

        if color_row_patterns:
            x = 2**np.arange(miss.shape[1])
            rky = np.dot(miss, x)
            _, rcol = np.unique(rky, return_inverse=True)
            miss *= 1 + rcol[:, None]
            ax.imshow(miss,
                      aspect="auto",
                      interpolation="nearest",
                      cmap='gist_ncar_r')
        else:
            cmap = LinearSegmentedColormap.from_list("_",
                                                     ["white", "darkgrey"])
            ax.imshow(miss, aspect="auto", interpolation="nearest", cmap=cmap)

        ax.set_ylabel("Cases")
        ax.set_xticks(range(len(cols)))
        ax.set_xticklabels(cols, rotation=90)

        return fig
Пример #49
0
def get_events_derivative(dff_trace, k_min=0, k_max=10, delta=3, smooth_window=5, smooth_weight=0.3, plot=False):
    '''
    this seems to work ok
    :param dff_trace:
    :param k_min:
    :param k_max:
    :param delta:
    :param first_only:
    :param smooth_window:
    :param smooth_weight:
    :param plot:
    :return:
    '''
  
    dff_trace = smooth(dff_trace, smooth_window)
    # if smooth_weight > 0:
    #     dff_trace = denoise_tv_chambolle(dff_trace, weight=smooth_weight)
    var_dict = {}

    for ii in range(len(dff_trace)):

        if ii + k_min >= 0 and ii + k_max <= len(dff_trace):
            trace = dff_trace[ii + k_min:ii + k_max]

            xx = (trace - trace[0])[delta] - (trace - trace[0])[0]
            # yy = (trace - trace[0])[delta + 2] - (trace - trace[0])[0 + 2]
            yy = max((trace - trace[0])[delta + 2] - (trace - trace[0])[0 + 2],
                     (trace - trace[0])[delta + 3] - (trace - trace[0])[0 + 3],
                     (trace - trace[0])[delta + 4] - (trace - trace[0])[0 + 4])

            var_dict[ii] = (trace[0], trace[-1], xx, yy)

    xx_list, yy_list = [], []
    for _, _, xx, yy in var_dict.itervalues():
        xx_list.append(xx)
        yy_list.append(yy)

    mu_x = np.median(xx_list)
    mu_y = np.median(yy_list)

    xx_centered = np.array(xx_list) - mu_x
    yy_centered = np.array(yy_list) - mu_y

    std_factor = 1
    std_x = 1. / std_factor * np.percentile(np.abs(xx_centered), [100 * (1 - 2 * (1 - sps.norm.cdf(std_factor)))])
    std_y = 1. / std_factor * np.percentile(np.abs(yy_centered), [100 * (1 - 2 * (1 - sps.norm.cdf(std_factor)))])

    curr_inds = []
    allowed_sigma = 4
    for ii, (xi, yi) in enumerate(zip(xx_centered, yy_centered)):
        if np.sqrt(((xi) / std_x) ** 2 + ((yi) / std_y) ** 2) < allowed_sigma:
            curr_inds.append(True)
        else:
            curr_inds.append(False)

    curr_inds = np.array(curr_inds)
    data_x = xx_centered[curr_inds]
    data_y = yy_centered[curr_inds]
    Cov = np.cov(data_x, data_y)
    Cov_Factor = np.linalg.cholesky(Cov)
    Cov_Factor_Inv = np.linalg.inv(Cov_Factor)

    # ===================================================================================================================

    # fig_dff, ax_dff = plt.subplots()
    # ax_dff.plot(dff_trace, 'k')

    # fig, ax = plt.subplots()
    noise_threshold = max(allowed_sigma * std_x + mu_x, allowed_sigma * std_y + mu_y)
    mu_array = np.array([mu_x, mu_y])
    yes_list, no_list, size_list = [], [], []

    for ii, (t0, tf, xx, yy) in var_dict.iteritems():

        xi_z, yi_z = Cov_Factor_Inv.dot((np.array([xx, yy]) - mu_array))

        # # Conditions in order:
        # # 1) Outside noise blob
        # # 2) Minimum change in df/f
        # # 3) Change evoked by this trial, not previous
        # # 4) At end of trace, ended up outside of noise floor
        #
        # if np.sqrt(xi_z ** 2 + yi_z ** 2) > 4 and yy > .05 and xx < yy and tf > noise_threshold / 2:

        # Conditions in order:
        # 1) outside noise blob
        # 2) positive transient
        # 3) change evoked by this trial, not next

        if np.sqrt(xi_z**2 + yi_z**2) > 4 and xx > 0:
        # if np.sqrt(xi_z ** 2 + yi_z ** 2) > 4 and yy > .05 and xx < yy and tf > noise_threshold / 2:

            yes_list.append(ii)
            size_list.append(xx)

            # ax.plot([xx], [yy], 'b.')
            # ax_dff.plot(ii, 2., 'b')
        else:
            no_list.append(ii)
            # ax.plot([xx], [yy], 'r.')

    # events_temp[yes_list] = 1

    if plot:
        plt.figure()
        plt.plot(xx_list[yes_list], yy[yes_list], 'b.')
        plt.plot(xx_list[no_list], yy[no_list], 'r.')

    yes_array = np.array(yes_list)
    size_array = np.array(size_list)

    return yes_array, size_array
Пример #50
0
print(np.quantile(rest_med, [0.025, 0.5, 0.975]))
print(np.mean([1 if i > 0 else 0 for i in rest_med]))

#----------------------------------------------------------------------------#
# Analisis graficos - Primer grafico
y1_1_grid = np.linspace(38, 62, num=1000)
y2_1_grid = np.linspace(38, 68, num=1000)

X, Y = np.meshgrid(y1_1_grid, y2_1_grid)
pos = np.empty(X.shape + (2, ))
pos[:, :, 0] = X
pos[:, :, 1] = Y

thetas1 = [i[0][0] for i in param]
thetas2 = [i[0][1] for i in param]
cov1 = np.cov(thetas1, thetas2)

rv = multivariate_normal([np.mean(thetas1), np.mean(thetas2)], cov1).pdf(pos)
plt.contour(X, Y, rv)
plt.plot(y1_1_grid, y2_1_grid)

# Segundo grafico
y1_2_grid = np.linspace(0, 100, num=1000)
y2_2_grid = np.linspace(0, 100, num=1000)

X2, Y2 = np.meshgrid(y1_2_grid, y2_2_grid)
pos2 = np.empty(X2.shape + (2, ))
pos2[:, :, 0] = X2
pos2[:, :, 1] = Y2

ys1 = [i[0] for i in data_pred]
    def test_sample_paths_wiener(self, watch_params, use_time_step,
                                 use_time_grid, supply_normal_draws):
        """Tests paths properties for Wiener process (dX = dW)."""
        dtype = tf.float64

        def drift_fn(_, x):
            return tf.zeros_like(x)

        def vol_fn(_, x):
            return tf.expand_dims(tf.ones_like(x), -1)

        times = np.array([0.1, 0.2, 0.3])
        num_samples = 10000
        if watch_params:
            watch_params = []
        else:
            watch_params = None
        if use_time_step:
            time_step = 0.01
            num_time_steps = None
        else:
            time_step = None
            num_time_steps = 30
        if use_time_grid:
            time_step = None
            times_grid = tf.linspace(tf.constant(0.0, dtype=dtype), 0.3, 31)
        else:
            times_grid = None
        if supply_normal_draws:
            num_samples = 1
            # Use antithetic sampling
            normal_draws = tf.random.stateless_normal(shape=[5000, 30, 1],
                                                      seed=[1, 42],
                                                      dtype=dtype)
            normal_draws = tf.concat([normal_draws, -normal_draws], axis=0)
        else:
            normal_draws = None
        paths = euler_sampling.sample(
            dim=1,
            drift_fn=drift_fn,
            volatility_fn=vol_fn,
            times=times,
            num_samples=num_samples,
            time_step=time_step,
            num_time_steps=num_time_steps,
            watch_params=watch_params,
            normal_draws=normal_draws,
            times_grid=times_grid,
            random_type=random.RandomType.STATELESS_ANTITHETIC,
            seed=[1, 42])

        # The correct number of samples
        num_samples = 10000
        with self.subTest('Shape'):
            self.assertAllEqual(paths.shape.as_list(), [num_samples, 3, 1])
        paths = self.evaluate(paths)
        means = np.mean(paths, axis=0).reshape([-1])
        covars = np.cov(paths.reshape([num_samples, -1]), rowvar=False)
        expected_means = np.zeros((3, ))
        expected_covars = np.minimum(times.reshape([-1, 1]),
                                     times.reshape([1, -1]))
        with self.subTest('Means'):
            self.assertAllClose(means, expected_means, rtol=1e-2, atol=1e-2)
        with self.subTest('Covariance'):
            self.assertAllClose(covars, expected_covars, rtol=1e-2, atol=1e-2)
Пример #52
0
def get_cca_similarity(acts1,
                       acts2,
                       threshold=0.98,
                       compute_dirns=True,
                       verbose=True):
    """The main function for computing cca similarities.

  This function computes the cca similarity between two sets of activations,
  returning a dict with the cca coefficients, a few statistics of the cca
  coefficients, and (optionally) the actual directions.

  Args:
            acts1: (num_neurons1, data_points) a 2d numpy array of neurons by
                   datapoints where entry (i,j) is the output of neuron i on
                   datapoint j.
            acts2: (num_neurons2, data_points) same as above, but (potentially)
                   for a different set of neurons. Note that acts1 and acts2
                   can have different numbers of neurons, but must agree on the
                   number of datapoints

            threshold: float between 0, 1 used to get rid of trailing zeros in
                       the cca correlation coefficients to output more accurate
                       summary statistics of correlations.

            compute_dirns: boolean value determining whether actual cca
                           directions are computed. (For very large neurons and
                           datasets, may be better to compute these on the fly
                           instead of store in memory.)

            verbose: Boolean, whether info about intermediate outputs printed

  Returns:
            return_dict: A dictionary with outputs from the cca computations.
                         Contains neuron coefficients (combinations of neurons
                         that correspond to cca directions), the cca correlation
                         coefficients (how well aligned directions correlate),
                         x and y idxs (for computing cca directions on the fly
                         if compute_dirns=False), and summary statistics. If
                         compute_dirns=True, the cca directions are also
                         computed.
  """

    # assert dimensionality equal
    assert acts1.shape[1] == acts2.shape[1], "dimensions don't match"
    # check that acts1, acts2 are transposition
    assert acts1.shape[0] < acts1.shape[1], ("input must be number of neurons"
                                             "by datapoints")
    return_dict = {}

    # compute covariance with numpy function for extra stability
    numx = acts1.shape[0]

    covariance = np.cov(acts1, acts2)
    sigmaxx = covariance[:numx, :numx]
    sigmaxy = covariance[:numx, numx:]
    sigmayx = covariance[numx:, :numx]
    sigmayy = covariance[numx:, numx:]

    # rescale covariance to make cca computation more stable
    xmax = np.max(np.abs(sigmaxx))
    ymax = np.max(np.abs(sigmayy))
    sigmaxx /= xmax
    sigmayy /= ymax
    sigmaxy /= np.sqrt(xmax * ymax)
    sigmayx /= np.sqrt(xmax * ymax)

    ([_, sx, vx], [_, sy, vy], invsqrt_xx, invsqrt_yy, x_idxs,
     y_idxs) = compute_ccas(sigmaxx, sigmaxy, sigmayx, sigmayy, verbose)

    # if x_idxs or y_idxs is all false, return_dict has zero entries
    if (not np.any(x_idxs)) or (not np.any(y_idxs)):
        return create_zero_dict(compute_dirns, acts1.shape[1])

    if compute_dirns:
        # orthonormal directions that are CCA directions
        cca_dirns1 = np.dot(vx, np.dot(invsqrt_xx, acts1[x_idxs]))
        cca_dirns2 = np.dot(vy, np.dot(invsqrt_yy, acts2[y_idxs]))

    # get rid of trailing zeros in the cca coefficients
    idx1 = sum_threshold(sx, threshold)
    idx2 = sum_threshold(sy, threshold)

    return_dict["neuron_coeffs1"] = np.dot(vx, invsqrt_xx)
    return_dict["neuron_coeffs2"] = np.dot(vy, invsqrt_yy)
    return_dict["cca_coef1"] = sx
    return_dict["cca_coef2"] = sy
    return_dict["x_idxs"] = x_idxs
    return_dict["y_idxs"] = y_idxs
    # summary statistics
    return_dict["mean"] = (np.mean(sx[:idx1]), np.mean(sy[:idx2]))
    return_dict["sum"] = (np.sum(sx), np.sum(sy))

    if compute_dirns:
        return_dict["cca_dirns1"] = cca_dirns1
        return_dict["cca_dirns2"] = cca_dirns2

    return return_dict
data = pd.read_csv('USArrests.csv')
Murder = np.array(data['Murder'])
Murder = (Murder - Murder.mean()) / Murder.std()
Assault = np.array(data['Assault'])
Assault = (Assault - Assault.mean()) / Assault.std()
UrbanPop = np.array(data['UrbanPop'])
UrbanPop = (UrbanPop - UrbanPop.mean()) / UrbanPop.std()
Rape = np.array(data['Rape'])
Rape = (Rape - Rape.mean()) / Rape.std()
states = np.array(data['Unnamed: 0'])
matriz = np.array([Murder, Assault, UrbanPop, Rape])

# In[116]:

mcov = np.cov(matriz)
val, vect = np.linalg.eig(mcov)
vec1 = vect[:, 0]
vec2 = vect[:, 1]

# In[117]:

variables = ['Murder', 'Assault', 'UrbanPop', 'Rape']
plt.figure(figsize=(12, 11))

for i in range(len(states)):
    statesX = np.dot(vec1, matriz[:, i])
    statesY = np.dot(vec2, matriz[:, i])
    plt.annotate(states[i], (statesX, statesY), fontsize=9, c='green')
    plt.scatter(statesX, statesY, c='white')
Пример #54
0
        new_complex = complex(round(complex_number.real, 2),
                              round(complex_number.imag, 2))
        rounded_state_vector.append(new_complex)

    plot_bloch_multivector(statevector, title=str(rounded_state_vector))
    plt.savefig(filename)


url = "https://raw.githubusercontent.com/ibonreinoso/qiskit-hackathon-bilbao-19/master/DAX_PERFORMANCE_INDEX.csv"
data = pd.read_csv(url, sep=';')

data = data.drop(['wkn_500340'], axis=1)
data = data.loc[:, ['wkn_515100', 'wkn_575200']]
print(data)

sigma2 = np.cov(data.values.T)
rho2 = sigma2 / np.matrix.trace(sigma2)
print(rho2)

eigenvalues, (eigenvector1, eigenvector2) = np.linalg.eigh(rho2)
print(eigenvalues, eigenvector1, eigenvector2)

eigenvector1.dot(rho2)

eigenvector1 * eigenvalues[0]

NUM_QUBITS = 3
NUM_ITERATION = 50
SHOTS_PER_ITERATION = 8192
backend = BasicAer.get_backend('qasm_simulator')
state_vector = [1, 0]
Пример #55
0
def linregress(x, y=None):
    """
Calculate a regression line

This computes a least-squares regression for two sets of measurements.

Parameters
----------
x, y : array_like
two sets of measurements. Both arrays should have the same length.
If only x is given (and y=None), then it must be a two-dimensional
array where one dimension has length 2. The two sets of measurements
are then found by splitting the array along the length-2 dimension.

Returns
-------
slope : float
slope of the regression line
intercept : float
intercept of the regression line
r-value : float
correlation coefficient
p-value : float
two-sided p-value for a hypothesis test whose null hypothesis is
that the slope is zero.
stderr : float
Standard error of the estimate


Examples
--------
>>> from scipy import stats
>>> import numpy as np
>>> x = np.random.random(10)
>>> y = np.random.random(10)
>>> slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

# To get coefficient of determination (r_squared)

>>> print "r-squared:", r_value**2
r-squared: 0.15286643777

"""
    TINY = 1.0e-20
    if y is None:  # x is a (2, N) or (N, 2) shaped array_like
        x = numpy.asarray(x)
        if x.shape[0] == 2:
            x, y = x
        elif x.shape[1] == 2:
            x, y = x.T
        else:
            msg = "If only `x` is given as input, it has to be of shape (2, N) \
or (N, 2), provided shape was %s" % str(x.shape)
            raise ValueError(msg)
    else:
        x = numpy.asarray(x)
        y = numpy.asarray(y)
    n = len(x)
    xmean = numpy.mean(x, None)
    ymean = numpy.mean(y, None)

    # average sum of squares:
    ssxm, ssxym, ssyxm, ssym = numpy.cov(x, y, bias=1).flat
    r_num = ssxym
    r_den = numpy.sqrt(ssxm * ssym)
    if r_den == 0.0:
        r = 0.0
    else:
        r = r_num / r_den
        # test for numerical error propagation
        if (r > 1.0):
            r = 1.0
        elif (r < -1.0):
            r = -1.0

    df = n - 2
    t = r * numpy.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY)))
    #    prob = distributions.t.sf(numpy.abs(t),df)*2
    slope = r_num / ssxm
    intercept = ymean - slope * xmean
    sterrest = numpy.sqrt((1 - r * r) * ssym / ssxm / df)
    pred = intercept + slope * x
    sigma = numpy.sqrt(1. / (len(x) - 1) * numpy.sum((y - pred)**2))
    return slope, intercept, sigma
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data

K = 3
meu_0 = data[np.random.randint(data.shape[0], size=K)]
sigma_0 = np.array([np.cov(data.T)] * K)
alpha_0 = np.ones(K) * (1 / K)


def gaussian(x, meu_k, sigma_k):
    m = x - meu_k
    f = np.linalg.inv(sigma_k)
    norm = 1 / (np.sqrt(((2 * np.pi)) * (np.linalg.det(sigma_k))))
    exp = np.exp(-0.5 * (m.T.dot(f.dot(m))))
    return norm * exp


def get_new_sigma(data, new_meu, w, N_k):
    sigma_tot = np.array([np.zeros((data.shape[1], data.shape[1]))] * K)
    for k in range(K):
        sum_sigma_i = 0
        for i in range(len(data)):
            x = data[i] - new_meu[k]
            sigma = w[i, k] * np.outer(x, x.T)
            sum_sigma_i += sigma
        sigma_tot[k] = sum_sigma_i / N_k[k]
Пример #57
0
    def calculate_songs_characteristics(self, mfcc_amount, cache_dir):
        """Calculate the songs characteristics.

        :param int mfcc_amount: The amount of mfccs to calculate.
        :param cache_dir: The directory to find and store the cache. The bpm
                          and mfcc is cached. If it is False caching is
                          disabled.
        :type cache_dir: str or ``False``
        :returns: A tuple of respectively their PCA components, a dictionary
                  for in which each song has a tuple of respectively their
                  cholesky decomposition, the mean of their mfcc and their
                  average BPM. Finally the return tuple contains the current
                  weights for calculating the covariance matrix.
        :rtype: tuple(numpy.array,
                      dict[string, tuple(numpy.array, int, int)],
                      numpy.array)
        """
        mfccs = dict()
        tempos = dict()
        average = numpy.zeros(mfcc_amount)
        song_properties = dict()

        # Calculate the average 20D feature vector for the mfccs
        for song_file in self.song_files:
            filename, _ = os.path.splitext(os.path.basename(song_file))
            l.debug("Currently loading %s.", filename)
            if cache_dir and os.path.isfile(
                    os.path.join(cache_dir, filename + "_done")):
                l.debug("Loading our song from cache.")
                mfcc = numpy.load(
                    os.path.join(cache_dir, filename + "_mfcc") + os.extsep +
                    'npy')
                tempo = numpy.load(
                    os.path.join(cache_dir, filename + "_tempo") + os.extsep +
                    'npy')
            else:
                l.debug("Song not found in cache, processing it.")
                if cache_dir:
                    mfcc, tempo = self.process_song_file(mfcc_amount,
                                                         cache_dir, song_file)
                else:
                    mfcc, tempo = self.get_mfcc_and_tempo(song_file,
                                                          mfcc_amount)

            mfccs[song_file] = mfcc
            tempos[song_file] = tempo
            average += mfcc.mean(1)

        # NOTE: We don't use the length of the songs as weights. Because we
        # prefer to weigh each song equally. This is also influenced by the
        # fact that we don't know how long each song will be played so using
        # the entire length doesn't really make any sense.
        average = average / len(self.song_files)
        average_covariance = numpy.array(
            [numpy.zeros(mfcc_amount) for _ in range(mfcc_amount)])

        # Now calculate the centered mfcc and covariance matrix for each song
        # and keep a running average of the average covariance matrix.
        for song_file, mfcc in mfccs.items():
            mfcc = (mfcc.T - average).T
            covariance = numpy.cov(mfcc)
            average_covariance += covariance
            props = (numpy.linalg.cholesky(covariance), numpy.mean(mfcc, 1),
                     tempos[song_file])
            song_properties[song_file] = props

        # Do PCA on the average covariance matrix
        average_covariance = average_covariance / len(self.song_files)
        pca = PCA(self.weight_amount)
        pca.fit(average_covariance.T)

        # Initialize the weights to the explained variance ratio if the weights
        # are not yet set.
        if self.weights is None:
            weights = pca.explained_variance_ratio_
        else:
            weights = self.weights

        return pca.components_.T, song_properties, weights
Пример #58
0
sharpe_ratios = np.zeros(len(good_pairs))
wealth_mat = np.zeros((len(good_pairs),len(wealth)-2))
for i in range(len(good_pairs)):
    if not i%10:
        print(i)
    wealth, q_stock_1, train_res=trading(good_pairs[i][0],good_pairs[i][1])
    sharpe_ratios[i] = sharpe_ratio(wealth[:-2])
    wealth_mat[i,:] = wealth[:-2]

print(np.mean(sharpe_ratios))
print(np.var(sharpe_ratios))


''' COMPUTE BETA '''
market = pd.read_csv(market_2010)
market['Date'] = pd.to_datetime(market['Date'],format = '%Y-%m-%d')
market.set_index('Date')
training_period_data = market[-len(wealth)+2:]
market_price = np.array(training_period_data['Open'])
tomorrow_price_market = market_price[1:]
today_price_market = market_price[:(len(market_price)-1)]
market_return = (tomorrow_price_market-today_price_market)/today_price_market



port = np.mean(wealth_mat, axis = 0)
port_return = (port[1:]- port[:-1])/port[:-1]

beta_mat = np.vstack((port_return,market_return))
print(np.cov(beta_mat))
Пример #59
0
def find_ellipse(prob, cl=90, projection='ARC', nest=False):
    """For a HEALPix map, find an ellipse that contains a given probability.

    The orientation is defined as the angle of the semimajor axis
    counterclockwise from west on the plane of the sky. If you think of the
    semimajor distance as the width of the ellipse, then the orientation is the
    clockwise rotation relative to the image x-axis. Equivalently, the
    orientation is the position angle of the semi-minor axis.

    These conventions match the definitions used in DS9 region files [1]_ and
    Aladin drawing commands [2]_.

    Parameters
    ----------
    prob : np.ndarray, astropy.table.Table
        The HEALPix probability map, either as a full rank explicit array
        or as a multi-order map.
    cl : float
        The desired credible level (default: 90).
    projection : str, optional
        The WCS projection (default: 'ARC', or zenithal equidistant).
        For a list of possible values, see the Astropy documentation [3]_.
    nest : bool
        HEALPix pixel ordering (default: False, or ring ordering).

    Returns
    -------
    ra : float
        The ellipse center right ascension in degrees.
    dec : float
        The ellipse center right ascension in degrees.
    a : float
        The lenth of the semimajor axis in degrees.
    b : float
        The length of the semiminor axis in degrees.
    pa : float
        The orientation of the ellipse axis on the plane of the sky in degrees.
    area : float
        The area of the ellipse in square degrees.

    Notes
    -----

    The center of the ellipse is the median a posteriori sky position. The
    length and orientation of the semi-major and semi-minor axes are measured
    as follows:

    1. The sky map is transformed to a WCS projection that may be specified by
       the caller. The default projection is ``ARC`` (zenithal equidistant), in
       which radial distances are proportional to the physical angular
       separation from the center point.
    2. A 1-sigma ellipse is estimated by calculating the covariance matrix in
       the projected image plane using three rounds of sigma clipping to reject
       distant outlier points.
    3. The 1-sigma ellipse is inflated until it encloses an integrated
       probability of ``cl`` (default: 90%).

    The function returns a tuple of the right ascension, declination,
    semi-major distance, semi-minor distance, and orientation angle, all in
    degrees.

    References
    ----------

    .. [1] http://ds9.si.edu/doc/ref/region.html
    .. [2] http://aladin.u-strasbg.fr/java/AladinScriptManual.gml#draw
    .. [3] http://docs.astropy.org/en/stable/wcs/index.html#supported-projections

    Examples
    --------

    **Example 1**

    First, we need some imports.

    >>> from astropy.io import fits
    >>> from astropy.utils.data import download_file
    >>> from astropy.wcs import WCS
    >>> import healpy as hp
    >>> from reproject import reproject_from_healpix
    >>> import subprocess

    Next, we download the BAYESTAR sky map for GW170817 from the
    LIGO Document Control Center.

    >>> url = 'https://dcc.ligo.org/public/0146/G1701985/001/bayestar.fits.gz'  # doctest: +SKIP
    >>> filename = download_file(url, cache=True, show_progress=False)  # doctest: +SKIP
    >>> _, healpix_hdu = fits.open(filename)  # doctest: +SKIP
    >>> prob = hp.read_map(healpix_hdu, verbose=False)  # doctest: +SKIP

    Then, we calculate ellipse and write it to a DS9 region file.

    >>> ra, dec, a, b, pa, area = find_ellipse(prob)  # doctest: +SKIP
    >>> print(*np.around([ra, dec, a, b, pa, area], 5))  # doctest: +SKIP
    195.03732 -19.29358 8.66545 1.1793 63.61698 32.07665
    >>> s = 'fk5;ellipse({},{},{},{},{})'.format(ra, dec, a, b, pa)  # doctest: +SKIP
    >>> open('ds9.reg', 'w').write(s)  # doctest: +SKIP

    Then, we reproject a small patch of the HEALPix map, and save it to a file.

    >>> wcs = WCS()  # doctest: +SKIP
    >>> wcs.wcs.ctype = ['RA---ARC', 'DEC--ARC']  # doctest: +SKIP
    >>> wcs.wcs.crval = [ra, dec]  # doctest: +SKIP
    >>> wcs.wcs.crpix = [128, 128]  # doctest: +SKIP
    >>> wcs.wcs.cdelt = [-0.1, 0.1]  # doctest: +SKIP
    >>> img, _ = reproject_from_healpix(healpix_hdu, wcs, [256, 256])  # doctest: +SKIP
    >>> img_hdu = fits.ImageHDU(img, wcs.to_header())  # doctest: +SKIP
    >>> img_hdu.writeto('skymap.fits')  # doctest: +SKIP

    Now open the image and region file in DS9. You should find that the ellipse
    encloses the probability hot spot. You can load the sky map and region file
    from the command line:

    .. code-block:: sh

        $ ds9 skymap.fits -region ds9.reg

    Or you can do this manually:

        1. Open DS9.
        2. Open the sky map: select "File->Open..." and choose ``skymap.fits``
           from the dialog box.
        3. Open the region file: select "Regions->Load Regions..." and choose
           ``ds9.reg`` from the dialog box.

    Now open the image and region file in Aladin.

        1. Open Aladin.
        2. Open the sky map: select "File->Load Local File..." and choose
           ``skymap.fits`` from the dialog box.
        3. Open the sky map: select "File->Load Local File..." and choose
           ``ds9.reg`` from the dialog box.

    You can also compare the original HEALPix file with the ellipse in Aladin:

        1. Open Aladin.
        2. Open the HEALPix file by pasting the URL from the top of this
           example in the Command field at the top of the window and hitting
           return, or by selecting "File->Load Direct URL...", pasting the URL,
           and clicking "Submit."
        3. Open the sky map: select "File->Load Local File..." and choose
           ``ds9.reg`` from the dialog box.

    **Example 2**

    This example shows that we get approximately the same answer for GW171087
    if we read it in as a multi-order map.

    >>> from ..io import read_sky_map  # doctest: +SKIP
    >>> skymap_moc = read_sky_map(healpix_hdu, moc=True)  # doctest: +SKIP
    >>> ellipse = find_ellipse(skymap_moc)  # doctest: +SKIP
    >>> print(*np.around(ellipse, 5))  # doctest: +SKIP
    195.03709 -19.27589 8.67611 1.18167 63.60454 32.08015

    **Example 3**

    I'm not showing the `ra` or `pa` output from the examples below because
    the right ascension is arbitary when dec=90° and the position angle is
    arbitrary when a=b; their arbitrary values may vary depending on your math
    library. Also, I add 0.0 to the outputs because on some platforms you tend
    to get values of dec or pa that get rounded to -0.0, which is within
    numerical precision but would break the doctests (see
    https://stackoverflow.com/questions/11010683).

    This is an example sky map that is uniform in sin(theta) out to a given
    radius in degrees. The 90% credible radius should be 0.9 * radius. (There
    will be deviations for small radius due to finite resolution.)

    >>> def make_uniform_in_sin_theta(radius, nside=512):
    ...     npix = hp.nside2npix(nside)
    ...     theta, phi = hp.pix2ang(nside, np.arange(npix))
    ...     theta_max = np.deg2rad(radius)
    ...     prob = np.where(theta <= theta_max, 1 / np.sin(theta), 0)
    ...     return prob / prob.sum()
    ...

    >>> prob = make_uniform_in_sin_theta(1)
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (225.0, 89.90862520480792, 0.8703361458208101, 0.8703357768874356, 0.0, 2.3788811576269793)

    >>> prob = make_uniform_in_sin_theta(10)
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (225.0, 89.90827657529562, 9.024846562072119, 9.024842703023802, 0.0, 255.11972196535515)

    >>> prob = make_uniform_in_sin_theta(120)
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (179.99995257991023, 90.0, 107.9745037610576, 107.97450376105758, 0.0, 26988.70467497216)

    **Example 4**

    These are approximately Gaussian distributions.

    >>> from scipy import stats
    >>> def make_gaussian(mean, cov, nside=512):
    ...     npix = hp.nside2npix(nside)
    ...     xyz = np.transpose(hp.pix2vec(nside, np.arange(npix)))
    ...     dist = stats.multivariate_normal(mean, cov)
    ...     prob = dist.pdf(xyz)
    ...     return prob / prob.sum()
    ...

    This one is centered at RA=45°, Dec=0° and has a standard deviation of ~1°.

    >>> prob = make_gaussian(
    ...     [1/np.sqrt(2), 1/np.sqrt(2), 0],
    ...     np.square(np.deg2rad(1)))
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (45.0, 0.0, 2.1424077148886744, 2.1420790721225518, 90.0, 14.467701995920123)

    This one is centered at RA=45°, Dec=0°, and is elongated in the north-south
    direction.

    >>> prob = make_gaussian(
    ...     [1/np.sqrt(2), 1/np.sqrt(2), 0],
    ...     np.diag(np.square(np.deg2rad([1, 1, 10]))))
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (44.99999999999999, 0.0, 13.58768882719899, 2.0829846178241853, 90.0, 88.57796576937031)

    This one is centered at RA=0°, Dec=0°, and is elongated in the east-west
    direction.

    >>> prob = make_gaussian(
    ...     [1, 0, 0],
    ...     np.diag(np.square(np.deg2rad([1, 10, 1]))))
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (0.0, 0.0, 13.583918022027149, 2.0823769912401433, 0.0, 88.54622940628761)

    This one is centered at RA=0°, Dec=0°, and has its long axis tilted about
    10° to the west of north.

    >>> prob = make_gaussian(
    ...     [1, 0, 0],
    ...     [[0.1, 0, 0],
    ...      [0, 0.1, -0.15],
    ...      [0, -0.15, 1]])
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (0.0, 0.0, 64.7713312709293, 33.50754131182681, 80.78231196786838, 6372.344658663038)

    This one is centered at RA=0°, Dec=0°, and has its long axis tilted about
    10° to the east of north.

    >>> prob = make_gaussian(
    ...     [1, 0, 0],
    ...     [[0.1, 0, 0],
    ...      [0, 0.1, 0.15],
    ...      [0, 0.15, 1]])
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (0.0, 0.0, 64.77133127093047, 33.50754131182745, 99.21768803213159, 6372.344658663096)

    This one is centered at RA=0°, Dec=0°, and has its long axis tilted about
    80° to the east of north.

    >>> prob = make_gaussian(
    ...     [1, 0, 0],
    ...     [[0.1, 0, 0],
    ...      [0, 1, 0.15],
    ...      [0, 0.15, 0.1]])
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (0.0, 0.0, 64.7756448603915, 33.509863018519894, 170.78252287327365, 6372.425731592412)

    This one is centered at RA=0°, Dec=0°, and has its long axis tilted about
    80° to the west of north.

    >>> prob = make_gaussian(
    ...     [1, 0, 0],
    ...     [[0.1, 0, 0],
    ...      [0, 1, -0.15],
    ...      [0, -0.15, 0.1]])
    ...
    >>> find_ellipse(prob)  # doctest: +FLOAT_CMP
    (0.0, 0.0, 64.77564486039148, 33.50986301851987, 9.217477126726322, 6372.42573159241)
    """  # noqa: E501
    try:
        prob['UNIQ']
    except (IndexError, KeyError, ValueError):
        npix = len(prob)
        nside = hp.npix2nside(npix)
        ipix = range(npix)
        area = hp.nside2pixarea(nside, degrees=True)
    else:
        order, ipix = moc.uniq2nest(prob['UNIQ'])
        nside = 1 << order.astype(int)
        ipix = ipix.astype(int)
        area = hp.nside2pixarea(nside)
        prob = prob['PROBDENSITY'] * area
        area *= np.square(180 / np.pi)
        nest = True

    # Find median a posteriori sky position.
    xyz0 = [
        quantile(x, 0.5, weights=prob)
        for x in hp.pix2vec(nside, ipix, nest=nest)
    ]
    (ra, ), (dec, ) = hp.vec2ang(np.asarray(xyz0), lonlat=True)

    # Construct WCS with the specified projection
    # and centered on mean direction.
    w = WCS()
    w.wcs.crval = [ra, dec]
    w.wcs.ctype = ['RA---' + projection, 'DEC--' + projection]

    # Transform HEALPix to zenithal equidistant coordinates.
    xy = w.wcs_world2pix(
        np.transpose(hp.pix2ang(nside, ipix, nest=nest, lonlat=True)), 1)

    # Keep only values that were inside the projection.
    keep = np.logical_and.reduce(np.isfinite(xy), axis=1)
    xy = xy[keep]
    prob = prob[keep]
    if not np.isscalar(area):
        area = area[keep]

    # Find covariance matrix, performing three rounds of sigma-clipping
    # to reject outliers.
    keep = np.ones(len(xy), dtype=bool)
    for _ in range(3):
        c = np.cov(xy[keep], aweights=prob[keep], rowvar=False)
        nsigmas = np.sqrt(np.sum(xy.T * np.linalg.solve(c, xy.T), axis=0))
        keep &= (nsigmas < 3)

    # Find the number of sigma that enclose the cl% credible level.
    i = np.argsort(nsigmas)
    nsigmas = nsigmas[i]
    cls = np.cumsum(prob[i])
    if np.isscalar(area):
        careas = np.arange(1, len(i) + 1) * area
    else:
        careas = np.cumsum(area[i])
    nsigma = np.interp(1e-2 * cl, cls, nsigmas)
    area = np.interp(1e-2 * cl, cls, careas)

    # If the credible level is not within the projection,
    # then stop here and return all nans.
    if 1e-2 * cl > cls[-1]:
        return np.nan, np.nan, np.nan, np.nan, np.nan

    # Find the eigendecomposition of the covariance matrix.
    w, v = np.linalg.eigh(c)

    # Find the semi-minor and semi-major axes.
    b, a = nsigma * np.sqrt(w)

    # Find the position angle.
    pa = np.rad2deg(np.arctan2(*v[0]))

    # An ellipse is symmetric under rotations of 180°.
    # Return the smallest possible positive position angle.
    pa %= 180

    # Done!
    return ra, dec, a, b, pa, area
Пример #60
0
def MC(x,
       Ux,
       b,
       a,
       Uab,
       runs=1000,
       blow=None,
       alow=None,
       return_samples=False,
       shift=0,
       verbose=True):
    r"""Standard Monte Carlo method

    Monte Carlo based propagation of uncertainties for a digital filter (b,a)
    with uncertainty matrix :math:`U_{\theta}` for
    :math:`\theta=(a_1,\ldots,a_{N_a},b_0,\ldots,b_{N_b})^T`

    Parameters
    ----------
        x: np.ndarray
            filter input signal
        Ux: float or np.ndarray
            standard deviation of signal noise (float), point-wise standard
            uncertainties or covariance matrix associated with x
        b: np.ndarray
            filter numerator coefficients
        a: np.ndarray
            filter denominator coefficients
        Uab: np.ndarray
            uncertainty matrix :math:`U_\theta`
        runs: int,optional
            number of Monte Carlo runs
        return_samples: bool, optional
            whether samples or mean and std are returned

    If ``return_samples`` is ``False``, the method returns:

    Returns
    -------
        y: np.ndarray
            filter output signal
        Uy: np.ndarray
            uncertainty associated with

    Otherwise the method returns:

    Returns
    -------
        Y: np.ndarray
            array of Monte Carlo results

    References
    ----------
        * Eichstädt, Link, Harris and Elster [Eichst2012]_
    """

    Na = len(a)
    runs = int(runs)

    Y = np.zeros((runs, len(x)))  # set up matrix of MC results
    theta = np.hstack(
        (a[1:], b))  # create the parameter vector from the filter coefficients
    Theta = np.random.multivariate_normal(theta, Uab,
                                          runs)  # Theta is small and thus we

    # can draw the full matrix now.
    if isinstance(Ux, np.ndarray):
        if len(Ux.shape) == 1:
            dist = Normal_ZeroCorr(loc=x,
                                   scale=Ux)  # non-iid noise w/o correlation
        else:
            dist = stats.multivariate_normal(x, Ux)  # colored noise
    elif isinstance(Ux, float):
        dist = Normal_ZeroCorr(loc=x, scale=Ux)  # iid noise
    else:
        raise NotImplementedError(
            "The supplied type of uncertainty is not implemented")

    unst_count = 0  # Count how often in the MC runs the IIR filter is unstable.
    st_inds = list()
    if verbose:
        sys.stdout.write("MC progress: ")
    for k in range(runs):
        xn = dist.rvs()  # draw filter input signal
        if not blow is None:
            if alow is None:
                alow = 1.0  # FIR low-pass filter
            xn = lfilter(blow, alow, xn)  # low-pass filtered input signal
        bb = Theta[k, Na - 1:]
        aa = np.hstack((1.0, Theta[k, :Na - 1]))
        if isstable(bb, aa):
            Y[k, :] = lfilter(bb, aa, xn)
            st_inds.append(k)
        else:
            unst_count += 1  # don't apply the IIR filter if it's unstable
        if np.mod(k, 0.1 * runs) == 0 and verbose:
            sys.stdout.write(" %d%%" % (np.round(100.0 * k / runs)))
    if verbose:
        sys.stdout.write(" 100%\n")

    if unst_count > 0:
        print("In %d Monte Carlo %d filters have been unstable" %
              (runs, unst_count))
        print(
            "These results will not be considered for calculation of mean and "
            "std")
        print("However, if return_samples is 'True' then ALL samples are "
              "returned.")

    Y = np.roll(Y, int(shift), axis=1)  # correct for the (known) sample delay

    if return_samples:
        return Y
    else:
        y = np.mean(Y[st_inds, :], axis=0)
        uy = np.cov(Y[st_inds, :], rowvar=False)
        return y, uy