Exemplo n.º 1
0
def diversity(df_sv_list):
    """ use skbio to compute different diversity metrics"""

    richness = pd.DataFrame(index=allsamples)
    shannon = pd.DataFrame(index=allsamples)
    bc_dm_list = []
    for i, df in enumerate(df_sv_list):
        data = df.iloc[:,
                       1:].T.values  #columns are the SVs and rows are the samples
        ids = df.columns[1:]  #ids should have the same order as the data rows
        #richness
        richness = richness.merge(pd.DataFrame(
            alpha_diversity("observed_otus", data, ids)),
                                  how="left",
                                  left_index=True,
                                  right_index=True)
        richness.rename(columns={0: df_sv_list_names[i]}, inplace=True)
        #shannon
        shannon = shannon.merge(pd.DataFrame(
            alpha_diversity("shannon", data, ids)),
                                how="left",
                                left_index=True,
                                right_index=True)
        shannon.rename(columns={0: df_sv_list_names[i]}, inplace=True)
        #bray-curtis distance matrix:
        bc_dm = beta_diversity("braycurtis", data, ids)
        temp_bc = pd.DataFrame(index=bc_dm.ids, columns=bc_dm.ids)
        temp_bc.iloc[:, :] = bc_dm.data
        bc_dm_list.append(temp_bc)
    return richness, shannon, bc_dm_list
Exemplo n.º 2
0
 def test_optimized(self):
     # calling optimized faith_pd gives same results as calling unoptimized
     # version
     optimized = alpha_diversity('faith_pd', self.table1, tree=self.tree1,
                                 otu_ids=self.oids1)
     unoptimized = alpha_diversity(faith_pd, self.table1, tree=self.tree1,
                                   otu_ids=self.oids1)
     assert_series_almost_equal(optimized, unoptimized)
Exemplo n.º 3
0
    def test_single_count_vector(self):
        actual = alpha_diversity('observed_otus', np.array([1, 0, 2]))
        expected = pd.Series([2])
        assert_series_almost_equal(actual, expected)

        actual = alpha_diversity('faith_pd', np.array([1, 3, 0, 1, 0]),
                                 tree=self.tree1, otu_ids=self.oids1)
        self.assertAlmostEqual(actual[0], 4.5)
Exemplo n.º 4
0
    def test_single_count_vector(self):
        actual = alpha_diversity('observed_otus', np.array([1, 0, 2]))
        expected = pd.Series([2])
        assert_series_almost_equal(actual, expected)

        actual = alpha_diversity('faith_pd', np.array([1, 3, 0, 1, 0]),
                                 tree=self.tree1, otu_ids=self.oids1)
        self.assertAlmostEqual(actual[0], 4.5)
Exemplo n.º 5
0
 def test_optimized(self):
     # calling optimized faith_pd gives same results as calling unoptimized
     # version
     optimized = alpha_diversity('faith_pd', self.table1, tree=self.tree1,
                                 otu_ids=self.oids1)
     unoptimized = alpha_diversity(faith_pd, self.table1, tree=self.tree1,
                                   otu_ids=self.oids1)
     assert_series_almost_equal(optimized, unoptimized)
Exemplo n.º 6
0
 def test_observed_otus(self):
     # expected values hand-calculated
     expected = pd.Series([3, 3, 3, 3], index=self.sids1)
     actual = alpha_diversity('observed_otus', self.table1, self.sids1)
     assert_series_almost_equal(actual, expected)
     # function passed instead of string
     actual = alpha_diversity(observed_otus, self.table1, self.sids1)
     assert_series_almost_equal(actual, expected)
     # alt input table
     expected = pd.Series([2, 1, 0], index=self.sids2)
     actual = alpha_diversity('observed_otus', self.table2, self.sids2)
     assert_series_almost_equal(actual, expected)
Exemplo n.º 7
0
 def test_observed_otus(self):
     # expected values hand-calculated
     expected = pd.Series([3, 3, 3, 3], index=self.sids1)
     actual = alpha_diversity('observed_otus', self.table1, self.sids1)
     assert_series_almost_equal(actual, expected)
     # function passed instead of string
     actual = alpha_diversity(observed_otus, self.table1, self.sids1)
     assert_series_almost_equal(actual, expected)
     # alt input table
     expected = pd.Series([2, 1, 0], index=self.sids2)
     actual = alpha_diversity('observed_otus', self.table2, self.sids2)
     assert_series_almost_equal(actual, expected)
Exemplo n.º 8
0
    def test_input_types(self):
        list_result = alpha_diversity('observed_otus', [1, 3, 0, 1, 0])
        array_result = alpha_diversity('observed_otus',
                                       np.array([1, 3, 0, 1, 0]))
        self.assertAlmostEqual(list_result[0], 3)
        assert_series_almost_equal(list_result, array_result)

        list_result = alpha_diversity('faith_pd', [1, 3, 0, 1, 0],
                                      tree=self.tree1, otu_ids=self.oids1)
        array_result = alpha_diversity('faith_pd', np.array([1, 3, 0, 1, 0]),
                                       tree=self.tree1, otu_ids=self.oids1)
        self.assertAlmostEqual(list_result[0], 4.5)
        assert_series_almost_equal(list_result, array_result)
Exemplo n.º 9
0
    def test_input_types(self):
        list_result = alpha_diversity('observed_otus', [1, 3, 0, 1, 0])
        array_result = alpha_diversity('observed_otus',
                                       np.array([1, 3, 0, 1, 0]))
        self.assertAlmostEqual(list_result[0], 3)
        assert_series_almost_equal(list_result, array_result)

        list_result = alpha_diversity('faith_pd', [1, 3, 0, 1, 0],
                                      tree=self.tree1, otu_ids=self.oids1)
        array_result = alpha_diversity('faith_pd', np.array([1, 3, 0, 1, 0]),
                                       tree=self.tree1, otu_ids=self.oids1)
        self.assertAlmostEqual(list_result[0], 4.5)
        assert_series_almost_equal(list_result, array_result)
Exemplo n.º 10
0
def compute_alphas(biom, tree=None,
                   metrics=['chao1',
                            'faith_pd',
                            'observed_otus']):
    alphas = {}
    for metric in metrics:
        if metric == 'faith_pd':
            alphas[metric] = alpha_diversity(metric, counts=np.asarray(biom.T),
                                             ids=biom.columns,
                                             otu_ids=biom.index, tree=tree)
        else:
            alphas[metric] = alpha_diversity(metric, counts=np.asarray(biom.T),
                                                 ids=biom.columns)
    return alphas
Exemplo n.º 11
0
def diversity_ana(metric,subsample,ids,**kwargs):
    if metric == 'faith_pd':
        each = alpha_diversity('faith_pd', subsample, ids=ids,
                               otu_ids=kwargs['otu_ids'], tree=kwargs['tree'])
    elif metric == 'shannon':
        each = alpha_diversity('shannon', subsample, ids=ids)
    elif metric == 'observed_otus':
        each = alpha_diversity('observed_otus', subsample, ids=ids)
    else:
        try:
            each = alpha_diversity(metric, subsample, ids=ids)
        except:
            print 'Metric you can use is listed below: \n' + '\n'.join(get_alpha_diversity_metrics())
            exit()
    return each
Exemplo n.º 12
0
def alpha(table: biom.Table):
    """

    :param table:
    :return:
    """
    if table.is_empty():
        raise ValueError("The provided table object is empty")

    table = get_biom_table(table)
    alpha_diversities = []
    counts = table.matrix_data.toarray().astype(float).T
    sample_ids = table.ids(axis='sample')
    sample_metadata = dict(zip(table.ids(), table.metadata()))

    for metric in ALPHA_DIVERSITY_METHODS:
        result = alpha_diversity(metric=metric, counts=counts, ids=sample_ids)
        result.name = metric
        alpha_diversities.append(result)

    aggregated_diversity_results = aggregate_results(alpha_diversities,
                                                     sample_ids)
    formatted_diversity_results = _format_alpha_results_to_json(
        aggregated_diversity_results, sample_metadata)

    return formatted_diversity_results
Exemplo n.º 13
0
 def __dapply__(self, experiment):
     otu_ids = experiment.data_df.index
     sample_ids = experiment.data_df.columns
     matrix = experiment.data_df.T.as_matrix()
     try:
         alpha = alpha_diversity(self.distance_metric, counts=matrix, ids=sample_ids, **self.kwargs)
     except ValueError as e:
         otu_ids_err_msg = "``otu_ids`` is required for phylogenetic diversity metrics."
         if str(e) == otu_ids_err_msg:
             alpha = alpha_diversity(self.distance_metric, counts=matrix,
                                     ids=sample_ids, otu_ids=otu_ids,
                                     **self.kwargs)
         else:
             raise(e)
             
     return alpha.to_frame(name=self.distance_metric).transpose()
Exemplo n.º 14
0
    def compute_alpha_diversity(self):
        """Compute and cache alpha diversity values

        This data is computed for the full dataset, not for a specific
        subsampling. Therefore once it is computed, we can later subsample
        from these vectors directly.

        See Also
        --------
        Sculptor.compute_beta_diversity
        """
        # is what's returned from to_frame a new copy?
        features = self._original_mf[[self.trajectory, self.gradient]].copy()

        X = self._original_bt.matrix_data.toarray().astype(np.int).T

        for metric in self._alpha_metrics:
            if metric == 'faith_pd':
                kws = {
                    'tree': self.tree,
                    'otu_ids': self._original_bt.ids('observation')
                }
            else:
                kws = {}

            features[metric] = alpha_diversity(metric, X,
                                               self._original_bt.ids('sample'),
                                               **kws)

        self._alpha_diversity_values = features
Exemplo n.º 15
0
def compute_alpha_diversity(table, metric, **kwargs):
    """Compute Faith's phylogenetic diversity.

    Parameters
    ----------
    table: biom.table.Table object
        BIOM table
    metric: str
        alpha diversity metric
    kwargs: dict, optional
        Metric-specific parameters

    Returns
    -------
    results: pd.Series
        alpha diversity per sample
    """
    sample_ids = table.ids(axis='sample')
    counts = table.matrix_data.astype(int).T.toarray()
    results = alpha_diversity(metric=metric,
                              counts=counts,
                              ids=sample_ids,
                              validate=False,
                              **kwargs)
    results.name = metric
    return results
Exemplo n.º 16
0
def compute_alpha_diversity(table,
                            metric,
                            **kwargs):
    """Compute Faith's phylogenetic diversity.

    Parameters
    ----------
    table: biom.table.Table object
        BIOM table
    metric: str
        alpha diversity metric
    kwargs: dict, optional
        Metric-specific parameters

    Returns
    -------
    results: pd.Series
        alpha diversity per sample
    """
    sample_ids = table.ids(axis='sample')
    counts = table.matrix_data.astype(int).T.toarray()
    results = alpha_diversity(metric=metric,
                              counts=counts,
                              ids=sample_ids,
                              validate=False,
                              **kwargs)
    results.name = metric
    return results
Exemplo n.º 17
0
def compute_alphas(biom,
                   tree=None,
                   metrics=['chao1', 'faith_pd', 'observed_otus']):
    alphas = {}
    for metric in metrics:
        if metric == 'faith_pd':
            alphas[metric] = alpha_diversity(metric,
                                             counts=np.asarray(biom.T),
                                             ids=biom.columns,
                                             otu_ids=biom.index,
                                             tree=tree)
        else:
            alphas[metric] = alpha_diversity(metric,
                                             counts=np.asarray(biom.T),
                                             ids=biom.columns)
    return alphas
Exemplo n.º 18
0
 def compute_alpha(self, metric="shannon"):
     if metric == 'shannon':
         otu_df_alpha = self.otu_df.replace(0, 1)
     else:
         otu_df_alpha = self.otu_df
     dist_series = alpha_diversity(metric, otu_df_alpha, self.sample_ids)
     return pd.Series(dist_series)
Exemplo n.º 19
0
    def test_faith_pd(self):
        # calling faith_pd through alpha_diversity gives same results as
        # calling it directly
        expected = []
        for e in self.table1:
            expected.append(faith_pd(e, tree=self.tree1, otu_ids=self.oids1))
        expected = pd.Series(expected)
        actual = alpha_diversity('faith_pd', self.table1, tree=self.tree1,
                                 otu_ids=self.oids1)
        assert_series_almost_equal(actual, expected)

        # alt input table and tree
        expected = []
        for e in self.table2:
            expected.append(faith_pd(e, tree=self.tree2, otu_ids=self.oids2))
        expected = pd.Series(expected)
        actual = alpha_diversity('faith_pd', self.table2, tree=self.tree2,
                                 otu_ids=self.oids2)
        assert_series_almost_equal(actual, expected)
Exemplo n.º 20
0
    def test_faith_pd(self):
        # calling faith_pd through alpha_diversity gives same results as
        # calling it directly
        expected = []
        for e in self.table1:
            expected.append(faith_pd(e, tree=self.tree1, otu_ids=self.oids1))
        expected = pd.Series(expected)
        actual = alpha_diversity('faith_pd', self.table1, tree=self.tree1,
                                 otu_ids=self.oids1)
        assert_series_almost_equal(actual, expected)

        # alt input table and tree
        expected = []
        for e in self.table2:
            expected.append(faith_pd(e, tree=self.tree2, otu_ids=self.oids2))
        expected = pd.Series(expected)
        actual = alpha_diversity('faith_pd', self.table2, tree=self.tree2,
                                 otu_ids=self.oids2)
        assert_series_almost_equal(actual, expected)
Exemplo n.º 21
0
    def __dapply__(self, experiment):
        otu_ids = experiment.data_df.index
        sample_ids = experiment.data_df.columns
        matrix = experiment.data_df.T.as_matrix()
        try:
            alpha = alpha_diversity(self.distance_metric,
                                    counts=matrix,
                                    ids=sample_ids,
                                    **self.kwargs)
        except ValueError as e:
            otu_ids_err_msg = "``otu_ids`` is required for phylogenetic diversity metrics."
            if str(e) == otu_ids_err_msg:
                alpha = alpha_diversity(self.distance_metric,
                                        counts=matrix,
                                        ids=sample_ids,
                                        otu_ids=otu_ids,
                                        **self.kwargs)
            else:
                raise (e)

        return alpha.to_frame(name=self.distance_metric).transpose()
Exemplo n.º 22
0
    def test_empty(self):
        # empty vector
        actual = alpha_diversity('observed_otus', np.array([], dtype=np.int64))
        expected = pd.Series([0])
        assert_series_almost_equal(actual, expected)

        # array of empty vector
        actual = alpha_diversity('observed_otus', np.array([[]],
                                                           dtype=np.int64))
        expected = pd.Series([0])
        assert_series_almost_equal(actual, expected)

        # array of empty vectors
        actual = alpha_diversity('observed_otus',
                                 np.array([[], []], dtype=np.int64))
        expected = pd.Series([0, 0])
        assert_series_almost_equal(actual, expected)

        # empty vector
        actual = alpha_diversity('faith_pd',
                                 np.array([], dtype=np.int64),
                                 tree=self.tree1,
                                 otu_ids=[])
        expected = pd.Series([0.])
        assert_series_almost_equal(actual, expected)

        # array of empty vector
        actual = alpha_diversity('faith_pd',
                                 np.array([[]], dtype=np.int64),
                                 tree=self.tree1,
                                 otu_ids=[])
        expected = pd.Series([0.])
        assert_series_almost_equal(actual, expected)

        # array of empty vectors
        actual = alpha_diversity('faith_pd',
                                 np.array([[], []], dtype=np.int64),
                                 tree=self.tree1,
                                 otu_ids=[])
        expected = pd.Series([0., 0.])
        assert_series_almost_equal(actual, expected)
Exemplo n.º 23
0
    def test_empty(self):
        # empty vector
        actual = alpha_diversity('observed_otus', np.array([], dtype=np.int64))
        expected = pd.Series([0])
        assert_series_almost_equal(actual, expected)

        # array of empty vector
        actual = alpha_diversity('observed_otus',
                                 np.array([[]], dtype=np.int64))
        expected = pd.Series([0])
        assert_series_almost_equal(actual, expected)

        # array of empty vectors
        actual = alpha_diversity('observed_otus',
                                 np.array([[], []], dtype=np.int64))
        expected = pd.Series([0, 0])
        assert_series_almost_equal(actual, expected)

        # empty vector
        actual = alpha_diversity('faith_pd', np.array([], dtype=np.int64),
                                 tree=self.tree1, otu_ids=[])
        expected = pd.Series([0.])
        assert_series_almost_equal(actual, expected)

        # array of empty vector
        actual = alpha_diversity('faith_pd',
                                 np.array([[]], dtype=np.int64),
                                 tree=self.tree1, otu_ids=[])
        expected = pd.Series([0.])
        assert_series_almost_equal(actual, expected)

        # array of empty vectors
        actual = alpha_diversity('faith_pd',
                                 np.array([[], []], dtype=np.int64),
                                 tree=self.tree1, otu_ids=[])
        expected = pd.Series([0., 0.])
        assert_series_almost_equal(actual, expected)
Exemplo n.º 24
0
#df1.to_csv("test2.tsv", sep="\t", header=1)

dfdgo = np.loadtxt("tpm_np.tsv")

dfd1 = dfd
datad = dfdgo
print(datad)
#ids = df1.index.tolist()
idsd = list(dfd1.index.values)
print(idsd)

#####################
# Diversity metrics #
#####################

adiv_obs_otusd = alpha_diversity('observed_otus', datad, idsd)

adiv_faith_pdd = alpha_diversity('faith_pd',
                                 datad,
                                 ids=idsd,
                                 otu_ids=dfd1.columns,
                                 tree=tree,
                                 validate=True)

#bc_dm = beta_diversity("braycurtis", data, ids, validate=False)

wu_dmd = beta_diversity("weighted_unifrac",
                        datad,
                        idsd,
                        tree=tree,
                        otu_ids=dfd1.columns,
def alpha_div(dataframe,list_s,fam_code,methode):
	df_matrix 	= dataframe.as_matrix()
	results 	= ((alpha_diversity(methode,df_matrix,ids=list_s,validate=True)))
	df_results	= pd.DataFrame(results,columns=[fam_code])
	df_results=df_results.fillna(0)
	return df_results, results
from skbio import TreeNode
from skbio.diversity import alpha_diversity
from biom import load_table
import numpy as np
import time
import sys

print('Running-faith_pd')
args = sys.argv
table = load_table(args[1])
otu_ids = table.ids('observation')
counts = np.asarray([table.data(i) for i in table.ids()])
tree = TreeNode.read(args[2])

t0 = time.time()
actual = alpha_diversity('faith_pd', counts, tree=tree, otu_ids=otu_ids)
actual.to_csv(
    '/home/garmstro/faith_pd/large-data/redbiom-fetch/redbiom-fetch-faith-pd.txt'
)
t1 = time.time()
print('Python time-{}'.format(t1 - t0))
Exemplo n.º 27
0
def run_fast_faith_shear(counts, otu_ids, tree):
    return list(alpha_diversity('fast_faith_pd', counts, tree=tree, otu_ids=otu_ids, shear=True).values)
Exemplo n.º 28
0
					ymtx = np.empty( ( num_repeats, len( xvals ) ), dtype=int )
					for i in range( num_repeats ):
						ymtx[i] = np.asarray( [ subsample( si, n ) for n in xvals ], dtype=int )
					yvals = ymtx.mean(0)
					def errfn(p, n, y):
						return ( ( ( p[0] * n / (p[1] + n ) ) - y ) ** 2 ).sum()
						#return ( ( p[0] * ( 1. - np.exp( n / p[1] ) ) - y ) ** 2 ).sum()
					params_guess = ( n_otu,  int( round( n_otu / 2 ) ) )
					print >>sys.stderr, yvals
					print >>sys.stderr, xvals

					mparams = fmin_powell( errfn, params_guess, ftol=1e-5, args=(xvals, yvals),	disp = False )
					ef = 2
					sv = "%.2f %.2f %.2f" % ( mparams[0], mparams[1], math.sqrt( errfn( mparams, xvals, yvals) / len( xvals ) ) )
				else:
					v = skdiv.alpha_diversity( slist[ k ], si ).values[0]
			except ( ValueError, TypeError, ZeroDivisionError ) as err:
				estr = str( err )
				ef = 1
			#except TypeError as err:
			#	ef = 1
			#except ZeroDivisionError as err:
			#	ef = 1
			if ef == 0:
				sv = "%.2f" % v
				if v == float( int( v ) ):
					sv = "%d" % int( v )
				print "<td>" + sv
			elif ef == 1:
				print "<td>??"
				print >>sys.stderr, estr
Exemplo n.º 29
0
TransForm.columns = TransForm.iloc[0]
TransForm = pd.DataFrame.drop(TransForm, 'formula_isotopefree', axis=0)

TransForm = TransForm.fillna(0)
TransForm = TransForm.reset_index()
new = TransForm['index'].str.split('(\d+)([A-Za-z]+)', n=3, expand=True)
new['SiteName'] = new[0] + new[1]
new = new.rename(columns={2: 'Position', 3: 'Depth'})
ids = pd.DataFrame.drop(new, [0, 1], axis=1)
OTU_equivalent = pd.DataFrame.drop(TransForm, 'index', axis=1)

Formulae = TransForm.reset_index()
Formulae = pd.DataFrame.drop(Formulae, 'index', axis=1)

for col in ids:
    adiv_obs_otus = alpha_diversity('observed_otus', OTU_equivalent, ids[col])
    alpha = adiv_obs_otus.reset_index()
    alpha = alpha.rename(columns={0: 'Count'})
    alpha['Rank'] = alpha['Count'].rank()
    alpha.plot(x='Rank', y=['Count'], kind='bar')
    plt.show()

#bc_dm = beta_diversity("braycurtis", OTU_equivalent, TransForm['index'])
#wu_pc = pcoa(bc_dm)
#fig = wu_pc.plot(new ,'Position')

my_dpi = 96
plt.figure(figsize=(480 / my_dpi, 480 / my_dpi), dpi=my_dpi)

# Keep the 'species' column appart + make it numeric for coloring
ids['Depth'] = pd.Categorical(ids['Depth'])
Exemplo n.º 30
0
#df1.to_csv("test2.tsv", sep="\t", header=1)

dfsgo = np.loadtxt("tpm_np.tsv")

dfs1 = dfs
datas = dfsgo
print(datas)
#ids = df1.index.tolist()
idss = list(dfs1.index.values)
print(idss)

#####################
# Diversity metrics #
#####################

adiv_obs_otuss = alpha_diversity('observed_otus', datas, idss)

adiv_faith_pds = alpha_diversity('faith_pd',
                                 datas,
                                 ids=idss,
                                 otu_ids=dfs1.columns,
                                 tree=tree,
                                 validate=False)

#bc_dm = beta_diversity("braycurtis", data, ids, validate=False)

wu_dms = beta_diversity("weighted_unifrac",
                        datas,
                        idss,
                        tree=tree,
                        otu_ids=dfs1.columns,
    var1 = np.array(normal).var()
    var2 = np.array(surgery).var()
    n1, n2 = len(normal), len(surgery)
    diff = float(mean_surgery - mean_normal)
    pooled_var = ((n2 * var2) + (n1 * var1)) / (n1 + n2)
    odds_score = float(0)
    effect_size = diff / sqrt(pooled_var)
    calc_dict['effect size'] = effect_size
    return calc_dict


normal = []
surgery = []
#p_value = 0.0
results_index = dict(
    alpha_diversity(str(index), sample_matrix, ids=list_sample, validate=True))
calcres = stat_diff(results_index, normal, surgery)
y = str(index) + '_index'
column_name = ['Sample', y]

df_final1 = pd.DataFrame(results_index.items(), columns=column_name)
df_final1.to_csv(outtitle1, header=True, index=True, sep='\t')
list_stage = []
list_stage_alpha = df_final1.index.values
df_calc = pd.DataFrame.from_dict(calcres, orient='index')
df_calc.to_csv(out_calc, header=True, index=True, sep='\t')

for x in list_stage_alpha:
    stage_x = (df_final1.loc[x, 'Sample'])
    stage = stage_x.split('.')[1]
    list_stage.append(stage)
Exemplo n.º 32
0
 def test_no_ids(self):
     # expected values hand-calculated
     expected = pd.Series([3, 3, 3, 3])
     actual = alpha_diversity('observed_otus', self.table1)
     assert_series_almost_equal(actual, expected)
Exemplo n.º 33
0
#df1.to_csv("test2.tsv", sep="\t", header=1)

dftgo = np.loadtxt("tpm_np.tsv")

dft1 = dft
datat = dftgo
print(datat)
#ids = df1.index.tolist()
idst = list(dft1.index.values)
print(idst)

#####################
# Diversity metrics #
#####################

adiv_obs_otust = alpha_diversity('observed_otus', datat, idst)

adiv_faith_pdt = alpha_diversity('faith_pd',
                                 datat,
                                 ids=idst,
                                 otu_ids=dft1.columns,
                                 tree=tree,
                                 validate=False)

#bc_dm = beta_diversity("braycurtis", data, ids, validate=False)

wu_dmt = beta_diversity("weighted_unifrac",
                        datat,
                        idst,
                        tree=tree,
                        otu_ids=dft1.columns,
Exemplo n.º 34
0
 def test_no_ids(self):
     # expected values hand-calculated
     expected = pd.Series([3, 3, 3, 3])
     actual = alpha_diversity('observed_otus', self.table1)
     assert_series_almost_equal(actual, expected)
Exemplo n.º 35
0
with open(table) as inf:
    inf.readline()
    names = inf.readline().split("\t")[1:-1]
    for line in inf:
        data.append([int(float(x)) for x in line.split("\t")[1:-1]])
        otus.append(line.split("\t")[0])

otus = [x.replace("_", " ") for x in otus]
df = pd.DataFrame(data, index=otus, columns=names)

if metric == "faith_pd":
    tree = TreeNode.read(tree_file)
    div = alpha_diversity(metric,
                          df.T,
                          ids=names,
                          otu_ids=otus,
                          tree=tree,
                          validate=False)
else:
    div = alpha_diversity(metric, df.T, names)
div_df = pd.DataFrame(div, columns=[metric])

div_df.to_csv("alphadiversity_" + mname + ".txt", sep="\t")

sns.set_style("ticks", {"ytick.major.size": "2.0"})
ax = sns.barplot(data=div_df.T, color=col)
sns.despine(right=True)
plt.ylabel(label)

plt.savefig(figname, dpi=dpi)
Exemplo n.º 36
0
def run_faith(counts, otu_ids, tree):
    return list(alpha_diversity('faith_pd', counts, tree=tree, otu_ids=otu_ids).values)
Exemplo n.º 37
0
    asv_df, tax_df, taxlevel='ASVg'
)  #### change ASVs hash to ASVg label for readability in downstream analysis !!!!!!
tax_df = tax_df.reset_index().set_index('ASVg')  ## same for taxonomy
tax_df = tax_df.loc[asv_df.columns]  ## include only ASVs in the CountTable
asv_df.to_csv(tmpDir + 'CountTable.tsv', sep='\t', index_label='SampleID'
              )  ## use this table in other notebooks for this analysis
tax_df.to_csv(tmpDir + 'TaxonomyTable.tsv', sep='\t',
              index_label="ASVg")  ## export taxonomy table
prepare_counts_for_ecol_inference(asv_df).to_csv(
    tmpDir + 'comm.tsv', sep='\t')  ### change the ASV names for R

meta_df = meta_df.reindex(
    asv_df.index)  ## include only samples in the CountTable
""" Add alpha-diversity indexes to the metatable  """
### Update the meta table with alpha diversities for each sample.
meta_df = meta_df.assign(AlphaShannon=div.alpha_diversity(
    "shannon", asv_df.astype(int).values, asv_df.index, base=np.e))
meta_df = meta_df.assign(
    AlphaSimpson=div.alpha_diversity("simpson",
                                     asv_df.astype(int).values, asv_df.index))
meta_df = meta_df.assign(
    AlphaChao=div.alpha_diversity("chao1",
                                  asv_df.astype(int).values, asv_df.index))
#meta_df = meta_df.assign(Richness = asv_df.astype(bool).sum(axis=1))
meta_df = meta_df.assign(
    Richness=div.alpha_diversity('observed_otus',
                                 asv_df.astype(int).values, asv_df.index))
meta_df['Eveness'] = meta_df.AlphaShannon / (np.log(meta_df.Richness))
### Update the meta table that was exported earlier
meta_df.to_csv(tmpDir + 'MetaTable.tsv', sep='\t', index_label='SampleID')

print(color.BOLD + color.BLUE + "\nClustering analysis...." + color.END)