def main(cor_file, perm_template, nperm, test_type='two_sided', outfile=None): ''' Compute pseudo p-vals from a set correlations obtained from permuted data' Pseudo p-vals are the percentage of times a correlation at least as extreme as the "real" one was observed in simulated datasets. Files containing the permuted correlations should be named with a consistent template, and these file names cannot contain any "#" characters. ''' cor = read_txt(cor_file) p_vals = get_pvalues(cor, perm_template, nperm, test_type) if outfile is None: outfile = cor_file + '.nperm_%d.pvals' % nperm write_txt(p_vals, outfile)
def main(counts_file, nperm, perm_template, outpath='./'): ''' Make n simulated datasets used to get pseudo p-values. Simulated datasets are generated by assigning each OTU in each sample an abundance that is randomly drawn (w. replacement) from the abundances of the OTU in all samples. Simulated datasets are either written out as txt files. ''' if perm_template is None: perm_template = counts_file + '.permuted_#.txt' ## read counts data counts = read_txt(counts_file) ## make permutated data make_bootstraps(counts, nperm, perm_template, outpath=outpath)
def get_pvalues(cor, perm_template, nperm, test_type='two_sided', iprint=0): ''' Compute pseudo p-vals from a set correlations obtained from permuted data' Pseudo p-vals are the percentage of times a correlation at least as extreme as the "real" one was observed in simulated datasets. Files containing the permuted correlations should be named with a consistent template, and these file names cannot contain any "#" characters. Parameters ---------- cor : DataFrame Inferred correlations whose p-values are to be computed. perm_template : str The template used for naming the correlation files of the permuted data. The iteration number is indicated with a "#". For example: 'permuted/cor.sparcc.permuted_#.txt' nperm : int Number of permutations available. test_type : 'two_sided' (default) | 'one_sided' two-sided = considering only the correlation magnitude. one-sided = accounting for the sign of correlations. iprint : int (default = 0) The interval at which iteration number is printed out. If iprint<=0 no printouts are made. Returns ------- p_vals: frame Computed pseudo p-values. ''' if test_type == 'two_sided': cmpfun = compare2sided elif test_type == 'one_sided': cmpfun = compare1sided else: raise ValueError, 'unsupported test type "%s"' % test_type n_sig = DF(np.zeros(cor.shape), index=cor.index, columns=cor.columns) for i in xrange(nperm): if iprint > 0: if not i % iprint: print i permfile = perm_template.replace('#', '%d' % i) cor_perm = read_txt(permfile).values n_sig[cmpfun(cor_perm, cor)] += 1 p_vals = 1. * n_sig / nperm p_vals.values[np.diag_indices_from(p_vals.values)] = 1 return p_vals
help="Correlation strength exclusion threshold (0.1 default).") (options, args) = parser.parse_args() counts_file = args[0] from analysis_methods import basis_corr from io_methods import read_txt, write_txt kwargs = options.__dict__ algo = kwargs.pop('algo') cor_file = kwargs.pop('cor_file') cov_file = kwargs.pop('cov_file') if cor_file is None: cor_file = 'cor_mat_' + algo + '.out' if cov_file is None: cov_file = 'cov_mat_' + algo + '.out' print 'reading data' counts = read_txt(counts_file) ## Calculate correlations between components using SparCC print 'computing correlations' cor, cov = basis_corr(counts, method=algo, **kwargs) ## write out results print 'writing results' write_txt(cor, cor_file) print 'wrote ' + cor_file if cov is not None: write_txt(cov, cov_file) print 'wrote ' + cov_file print 'Done!'
def driver(): #if __name__ == '__main__': #print "RUNNING CODE MAIN" ## parse input arguments from optparse import OptionParser kwargs = {} usage = ( 'Compute the correlation between components (e.g. OTUs).\n' 'By default uses the SparCC algorithm to account for compositional effects.\n' 'Correlation and covariance (when applies) matrices are written out as txt files. \n' 'Counts file needs to be a tab delimited text file where columns are samples and rows are components (e.g. OTUS).\n' ' See example/fake_data.txt for an example file.\n' '\n' 'Usage: python SparCC.py counts_file [options]\n' 'Example: python SparCC.py example/fake_data.txt -i 20 --cor_file=example/basis_corr/cor_mat_sparcc.out' ) parser = OptionParser(usage) parser.add_option("-c", "--cor_file", dest="cor_file", type='str', help="File to which correlation matrix will be written.") parser.add_option("-v", "--cov_file", dest="cov_file", type='str', help="File to which covariance matrix will be written.") parser.add_option( "-a", "--algo", dest="algo", default='SparCC', help= "Name of algorithm used to compute correlations (SparCC (default) | pearson | spearman | kendall)" ) parser.add_option( "-i", "--iter", dest='iter', type='int', default=20, help="Number of inference iterations to average over (20 default).") parser.add_option( "-x", "--xiter", dest='xiter', type='int', default=10, help= "Number of exclusion iterations to remove strongly correlated pairs (10 default)." ) parser.add_option( "-t", "--thershold", dest='th', type='float', default=0.1, help="Correlation strength exclusion threshold (0.1 default).") parser.add_option("-p", "--pval_file", dest="pval_file", type='str', help="File to which pvalues will be written.") (options, args) = parser.parse_args() #print "OPTIONS: ", options #print "ARGS: ", args counts_file = args[0] from analysis_methods import basis_corr from io_methods import read_txt, write_txt kwargs = options.__dict__ algo = kwargs.pop('algo') cor_file = kwargs.pop('cor_file') cov_file = kwargs.pop('cov_file') pval_file = kwargs.pop('pval_file') if cor_file is None: cor_file = 'cor_mat_' + algo + '.out' if cov_file is None: cov_file = 'cov_mat_' + algo + '.out' if pval_file is None: pval_file = 'pval_mat_' + algo + '.out' print('reading data') counts = read_txt(counts_file) ## Calculate correlations between components using SparCC print('computing correlations') cor, cov, pval = basis_corr(counts, method=algo, **kwargs) print(counts) ## write out results print('writing results') write_txt(cor, cor_file) print('wrote ' + cor_file) if cov is not None: write_txt(cov, cov_file) #print 'wrote ' + cov_file if pval is not None: write_txt(pval, pval_file)