Пример #1
0
    def make_network(self, algo = 'crude', **kwargs):
        '''
        '''
        import SurveyStructures.OTUnetwork as ONet
        n, k       = self.shape
        row_labels = kwargs.get('row_labels', range(k))
        col_labels = kwargs.get('col_labels', range(n))
        
        md = self.to_Surveymat(**kwargs)
        if   algo == 'crude':   mat,pval = (md.log10()).correlation(type = 'pearsonr')
        elif algo in set(['sparse', 'csi', 'clr']):   
            v_sparse, c_sparse, Cov_sparse = self.basis_corr(method = algo,**kwargs)
            mat   = MatrixDictionary()
            mat.from_matrix(c_sparse, row_labels, row_labels)
            pval = mat
        elif algo == 'basis':
            md     = Survey_matrix()
            n, k   = self.shape
            mat    = np.log(self.basis)
            md.from_matrix(mat.transpose(), row_labels, col_labels)
            mat,pval = md.correlation(type = 'pearsonr')
        elif algo in set(['pearsonr','kendalltau','spearmanr']):
            abunds = self.to_Surveymat()
            mat,pval = abunds.correlation(type = algo)

                
        ## make network
        if algo == 'MI': net = ONet.OTUnetwork(  abunds = abunds, lineages = lineages, matrix = mat.to_PairMatrix(), p_vals = {'direct':None}, algo = 'MI', lag = 0)
        else:  net = ONet.OTUnetwork(node_ids = mat.row_labels(), matrix = mat, p_vals = {'direct':pval}, algo = algo, lag = 0)
        return net
Пример #2
0
def dirichlet_multi(a , n, N, **kwargs):
    '''
    Each sample is a sample from a multinomial with probabilities drawn from a dirichlet.
    Inputs:
        a = [array] alpha parameters of dirichlet. length determins the number of otus.
        n = [int] number of samples.
        N = [int/array] number of reads to take for each sample.
            If int take same number of reads from each sample.
            If list, length must be = n.
    Optional inputs:
        otu_labels    = [list] row labels for Survey_matrix. Default to ['otu_1','otu_2',...,'otu_k'].
        sample_labels = [list] col labels for Survey_matrix. Default to ['sample_1','sample_2',...,'sample_n'].
    Return Survey_matrix of simulated data.
    '''    
    k     = len(a)              # number of otus
    if 'otu_labels' not in kwargs: 
        otu_labels   = map(lambda i: 'otu_' + str(i) ,range(1,1+k))
    else: otu_labels = kwargs['otu_labels']
    if 'sample_labels' not in kwargs: 
        sample_labels = map(lambda i: 'sample_' + str(i) ,range(1,1+n))
    else: sample_labels = kwargs['sample_labels']
    if type(N) == int: N = [N]*n
    
    probs = dirichlet(a,int(n)) # each row gives the probabilities of different otus in 1 sample.
    mat   = np.zeros((k,n))
    for i in range(n):
        mat[:,i] =  multinomial(N[i],probs[i,:])
    sim_data   = Survey_matrix()
    sim_data.from_matrix(mat ,row_labels = otu_labels, col_labels = sample_labels )
    return sim_data
Пример #3
0
def dirichlet_multi(a, n, N, **kwargs):
    '''
    Each sample is a sample from a multinomial with probabilities drawn from a dirichlet.
    Inputs:
        a = [array] alpha parameters of dirichlet. length determins the number of otus.
        n = [int] number of samples.
        N = [int/array] number of reads to take for each sample.
            If int take same number of reads from each sample.
            If list, length must be = n.
    Optional inputs:
        otu_labels    = [list] row labels for Survey_matrix. Default to ['otu_1','otu_2',...,'otu_k'].
        sample_labels = [list] col labels for Survey_matrix. Default to ['sample_1','sample_2',...,'sample_n'].
    Return Survey_matrix of simulated data.
    '''
    k = len(a)  # number of otus
    if 'otu_labels' not in kwargs:
        otu_labels = map(lambda i: 'otu_' + str(i), range(1, 1 + k))
    else:
        otu_labels = kwargs['otu_labels']
    if 'sample_labels' not in kwargs:
        sample_labels = map(lambda i: 'sample_' + str(i), range(1, 1 + n))
    else:
        sample_labels = kwargs['sample_labels']
    if type(N) == int: N = [N] * n

    probs = dirichlet(a, int(
        n))  # each row gives the probabilities of different otus in 1 sample.
    mat = np.zeros((k, n))
    for i in range(n):
        mat[:, i] = multinomial(N[i], probs[i, :])
    sim_data = Survey_matrix()
    sim_data.from_matrix(mat, row_labels=otu_labels, col_labels=sample_labels)
    return sim_data
Пример #4
0
def independent_kde(site, original):
    from numpy.random import rand
    counts, otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
    #    N = original.total_counts(rows = False).values()
    #    N_avg = np.mean(N)
    N = [1000] * n
    ## load kde of each OTU
    path = '../data/otu_dist/'
    file = site + '_filtered_kde.pick'
    f = open(path + file, 'r')
    kdes, cdfs = pickle.load(f)
    f.close()
    ## simulate each OTU
    x = np.linspace(0, 1, 100)
    simulated = np.zeros((k, n))
    for i, otu in enumerate(otus):
        cdf = cdfs[otu]
        for j in range(n):
            p = x[cdf > rand()][0]
            simulated[i, j] = binomial(N[j], p)
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim
Пример #5
0
 def to_SurveyMat(self, **kwargs):
     temp = Survey_matrix()
     n, k = self.shape
     row_labels = kwargs.get('row_labels', range(k))
     col_labels = kwargs.get('col_labels', range(n))
     temp.from_matrix(self.transpose(), row_labels, col_labels)
     return temp
Пример #6
0
def independent_kde(site, original):
    from numpy.random import rand        
    counts , otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
#    N = original.total_counts(rows = False).values()
#    N_avg = np.mean(N)
    N = [1000] * n
    ## load kde of each OTU
    path  = '../data/otu_dist/'
    file  = site + '_filtered_kde.pick'
    f     = open(path + file,'r')
    kdes, cdfs  = pickle.load(f)
    f.close()
    ## simulate each OTU
    x = np.linspace(0,1,100)
    simulated = np.zeros((k,n))
    for i,otu in enumerate(otus):
        cdf = cdfs[otu]
        for j in range(n):
            p = x[cdf>rand()][0]  
            simulated[i,j] = binomial(N[j],p)
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim
Пример #7
0
 def to_SurveyMat(self, **kwargs):
     temp       = Survey_matrix()
     n, k       = self.shape
     row_labels = kwargs.get('row_labels', range(k))
     col_labels = kwargs.get('col_labels', range(n))
     temp.from_matrix(self.transpose(), row_labels, col_labels)
     return temp
Пример #8
0
def test_simulate():
    mat = Survey_matrix()
    x = np.array([[20,20],[20,40]])
    r_lab = ['r0','r1']
    c_lab = ['c0','c1']
    mat.from_matrix(x,r_lab,c_lab)
    sims = simulate(mat, dirichlet_multi, num_sim = 10)
    print sims
Пример #9
0
def test_simulate():
    mat = Survey_matrix()
    x = np.array([[20, 20], [20, 40]])
    r_lab = ['r0', 'r1']
    c_lab = ['c0', 'c1']
    mat.from_matrix(x, r_lab, c_lab)
    sims = simulate(mat, dirichlet_multi, num_sim=10)
    print sims
Пример #10
0
def test_permute():
    mat = Survey_matrix()
    x = np.array([[1,2,3,4,5],[6,7,8,9,10]])
    r_lab = ['r0','r1']
    c_lab = ['c0','c1','c2','c3','c4']
    mat.from_matrix(x,r_lab,c_lab)
    sims = permute_w_replacement(mat)
    print x
    print sims.to_matrix()
Пример #11
0
def test_permute():
    mat = Survey_matrix()
    x = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
    r_lab = ['r0', 'r1']
    c_lab = ['c0', 'c1', 'c2', 'c3', 'c4']
    mat.from_matrix(x, r_lab, c_lab)
    sims = permute_w_replacement(mat)
    print x
    print sims.to_matrix()
Пример #12
0
def permute_w_replacement(original):
    '''
    Create simulated dataset were the count of each otu in each sample are randomly sampled from the all the counts of that otu in all samples.
    '''
    from numpy.random import randint 
    counts , otus, samples = original.to_matrix()
    n,k = counts.shape
    new = np.zeros( (n,k) )
    for row in range(n):
        new[row] = counts[row, randint(0,k,(1,k)) ]
    sim = Survey_matrix()
    sim.from_matrix(new, otus, samples)
    return sim
Пример #13
0
def permute_w_replacement(original):
    '''
    Create simulated dataset were the count of each otu in each sample are randomly sampled from the all the counts of that otu in all samples.
    '''
    from numpy.random import randint
    counts, otus, samples = original.to_matrix()
    n, k = counts.shape
    new = np.zeros((n, k))
    for row in range(n):
        new[row] = counts[row, randint(0, k, (1, k))]
    sim = Survey_matrix()
    sim.from_matrix(new, otus, samples)
    return sim
Пример #14
0
    def make_network(self, algo='crude', **kwargs):
        '''
        '''
        import SurveyStructures.OTUnetwork as ONet
        n, k = self.shape
        row_labels = kwargs.get('row_labels', range(k))
        col_labels = kwargs.get('col_labels', range(n))

        md = self.to_Surveymat(**kwargs)
        if algo == 'crude':
            mat, pval = (md.log10()).correlation(type='pearsonr')
        elif algo in set(['sparse', 'csi', 'clr']):
            v_sparse, c_sparse, Cov_sparse = self.basis_corr(method=algo,
                                                             **kwargs)
            mat = MatrixDictionary()
            mat.from_matrix(c_sparse, row_labels, row_labels)
            pval = mat
        elif algo == 'basis':
            md = Survey_matrix()
            n, k = self.shape
            mat = np.log(self.basis)
            md.from_matrix(mat.transpose(), row_labels, col_labels)
            mat, pval = md.correlation(type='pearsonr')
        elif algo in set(['pearsonr', 'kendalltau', 'spearmanr']):
            abunds = self.to_Surveymat()
            mat, pval = abunds.correlation(type=algo)

        ## make network
        if algo == 'MI':
            net = ONet.OTUnetwork(abunds=abunds,
                                  lineages=lineages,
                                  matrix=mat.to_PairMatrix(),
                                  p_vals={'direct': None},
                                  algo='MI',
                                  lag=0)
        else:
            net = ONet.OTUnetwork(node_ids=mat.row_labels(),
                                  matrix=mat,
                                  p_vals={'direct': pval},
                                  algo=algo,
                                  lag=0)
        return net
Пример #15
0
def test_independent_betabinom():
    n = 5
    m = 50
    mat = np.floor(100 * np.random.rand(n, m))
    mat[1, :] = 100 - mat[0, :]
    row_labels = range(n)
    col_labels = range(m)
    data = Survey_matrix()
    data.from_matrix(mat, row_labels, col_labels)
    c, p = data.correlation(type='pearsonr')
    print c.to_matrix()
    print p.to_matrix()
    sim = independent_betabinom(data)
    c_sim, p = sim.correlation(type='pearsonr')
    print c_sim.to_matrix()
    print p.to_matrix()
    sim_norm = sim.normalize(rows=False)
    c_sim_norm, p = sim_norm.correlation(type='pearsonr')
    print c_sim_norm.to_matrix()
    print p.to_matrix()
Пример #16
0
def test_independent_betabinom(): 
    n = 5
    m = 50
    mat = np.floor(100*np.random.rand(n,m))
    mat[1,:] = 100-mat[0,:]
    row_labels = range(n)
    col_labels = range(m)
    data = Survey_matrix()
    data.from_matrix(mat, row_labels, col_labels)
    c,p = data.correlation(type = 'pearsonr')
    print c.to_matrix()
    print p.to_matrix()
    sim = independent_betabinom(data)
    c_sim, p = sim.correlation(type = 'pearsonr')
    print c_sim.to_matrix()
    print p.to_matrix()
    sim_norm = sim.normalize(rows = False)
    c_sim_norm, p = sim_norm.correlation(type = 'pearsonr')
    print c_sim_norm.to_matrix()
    print p.to_matrix()
Пример #17
0
def independent_betabinom(original):
    counts , otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
    N = original.total_counts(rows = False).values()
    N_avg = np.mean(N)
    ## estimate beta-dist params for each OTU
    a = []
    tot = counts.sum(axis = 0)
    for i in range(k):
        c = np.c_[counts[i,:], tot- counts[i,:]]
        a_opt, ll = estimate.dirmulti(c, symmetric = False)
        a.append(a_opt)
    ## simulate each OTU
    simulated = np.zeros((k,n))
    for i in range(k):
        probs = dirichlet(a[i],n)
        for j,p in enumerate(probs):
            simulated[i,j] = multinomial(N_avg,p)[0]
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim
Пример #18
0
def independent_betabinom(original):
    counts, otus, samples = original.to_matrix()
    k = len(otus)
    n = len(samples)
    N = original.total_counts(rows=False).values()
    N_avg = np.mean(N)
    ## estimate beta-dist params for each OTU
    a = []
    tot = counts.sum(axis=0)
    for i in range(k):
        c = np.c_[counts[i, :], tot - counts[i, :]]
        a_opt, ll = estimate.dirmulti(c, symmetric=False)
        a.append(a_opt)
    ## simulate each OTU
    simulated = np.zeros((k, n))
    for i in range(k):
        probs = dirichlet(a[i], n)
        for j, p in enumerate(probs):
            simulated[i, j] = multinomial(N_avg, p)[0]
    ## convert to Survey_matrix
    sim = Survey_matrix()
    sim.from_matrix(simulated, otus, samples)
    return sim