Пример #1
0
    def test_call_unequal_group_sizes(self):
        exp = pd.Series(
            index=self.exp_index,
            data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645, 999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
Пример #2
0
    def test_call_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645,
                              999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
Пример #3
0
 def test_call_no_ties(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, 0.332, 999], name="PERMANOVA results"
     )
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
Пример #4
0
    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
Пример #5
0
    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
Пример #6
0
 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999],
                     name='PERMANOVA results')
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
Пример #7
0
def permanovaResult(args, current_wd, retrospect_dir, output_file_tag,
                    notebook_name, suppress, silence, neglect):
    # python3 -m emmer.bake -m 'Permanova' -i emmer/data/bake_data_dir_6/filtered_infoRich__PCA_coordinates.csv

    permanova_args = PermanovaArgs(args=args,
                                   current_wd=current_wd,
                                   suppress=suppress,
                                   silence=silence)

    ## conduct PERMANOVA
    numpy.random.seed(0)

    result = permanova(
        permanova_args.dist_matrix, permanova_args.cluster,
        permutations=999)  ## TODO: allow user-define $permutations and $seed
    print(result)

    notebook = UpdateNoteBook(notebook_name=notebook_name,
                              neglect=neglect).updatePermanovaResult(
                                  set_seed='0',
                                  set_cluster=permanova_args.cluster,
                                  test_result=result)

    parameter_df = pandas.DataFrame({
        'individual': permanova_args.individual,
        'cluster': permanova_args.cluster
    })
    output_file_name = os.path.join(
        retrospect_dir,
        (output_file_tag + '_retrospect_permanova_parameter.csv'))
    parameter_df.to_csv(output_file_name)
Пример #8
0
 def permanova_permdisp(self):
     # compute the permanova
     print('running permdisp\n\n')
     print(permdisp(distance_matrix=DistanceMatrix(self.dist_df),
                    grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=999))
     print('running permanova\n\n')
     print(permanova(distance_matrix=DistanceMatrix(self.dist_df),
                     grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=9999))
Пример #9
0
 def testPer(self, dist, group):
     per = self.permanova(dist, group)
     print(per[0])
     print(per[2])
     print(
         permanova(DistanceMatrix(dist, range(len(group))),
                   group,
                   column=None,
                   permutations=999))
Пример #10
0
def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)
Пример #11
0
def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)
Пример #12
0
def get_permanova_ranked_list(x, y, feature_list, label_set):
    x = x.transpose().values

    values = []
    for f in range(len(feature_list)):
        sub_x = x[:, f]
        dist = pairwise_distances(sub_x.reshape(-1, 1),
                                  sub_x.reshape(-1, 1),
                                  metric="cityblock")
        dist = DistanceMatrix(data=dist)
        perm = permanova(dist, y)
        values.append(perm.loc["p-value"])

    fdr_values = multipletests(values, method="fdr_bh")[1]
    permanova_df = pd.DataFrame(index=feature_list,
                                data={
                                    "p-value": np.array(values).reshape(-1),
                                    "Adj p-value":
                                    np.array(fdr_values).reshape(-1)
                                })
    return permanova_df
Пример #13
0
def pseudoF_permanova(points, labels):
    """ Statistical significance is assessed via a permutation test.
     The assignment of objects to groups (grouping) is randomly permuted a number of times
     (controlled via permutations). A pseudo-F statistic is computed for each permutation and the
     p-value is the proportion of
    permuted pseudo-F statisics that are equal to or greater than the original
     (unpermuted) pseudo-F statistic. (using sklearn pairwise euclidean_distance function)

    Parameters
    ----------
    points : np.array
        np.array([N, p]) of all points
    labels: np.array
        np.array([N]) labels of all points
    """
    distances = skbio.DistanceMatrix(points.as_matrix())
    ks = np.sort(np.unique(labels))

    pseudo_f = permanova(distances, labels)
    print(pseudo_f)
    return pseudo_f
Пример #14
0
sns.set(font_scale=1.5, style="ticks")
g = sns.FacetGrid(tsne, hue="taxa", height=10, aspect=16 / 10)
gm = g.map(plt.scatter, "x", "y", alpha=0.25)
means = tsne.groupby(taxa).agg("median").reset_index()
texts = means.apply(lambda df: plt.text(df.x, df.y, df.taxa, alpha=0.65),
                    axis=1)
texts = adjust_text(
    texts,
    force_text=(0.02, 0.5),
    arrowprops=dict(arrowstyle="-|>", alpha=0.5, color="k"),
)
plt.savefig("figures/individual_media.png", dpi=200)
plt.close()

# Some statistics about metabolite usage
# indicator matrix 0 = metabolite not consumed, 1 = metabolite consumed
binary = mat.where(mat < -1e-6, 0).where(mat > -1e-6, 1)

# Jaccard distances = 1 - percent overlap
J = pdist(binary, "jaccard")
print("Jaccard distances:", pd.Series(J).describe(), sep="\n")

# euclidean distances
E = pdist(mat, "euclidean")

# Test whether genus explains a good amount of that variation
p = permanova(DistanceMatrix(E), taxa)
r2 = 1 - 1 / (1 + p[4] * p[3] / (p[2] - p[3] - 1))
p["R2"] = r2
print("PERMANOVA on euclidean distances:", p, sep="\n")
Пример #15
0
 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
Пример #16
0
 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999])
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
Пример #17
0
            clr_res = clr_inv(np.dot(np.dot(U, s), V.T))
            # use just kl_div here because already closed
            kl_clr = entropy(closure(basetmp_sub).T, clr_res.T).mean()
            results[(rank_, power_, depth_, 'rclr', 'KL-Div')] = [kl_clr]

            # test KL without rclr
            X_spn = np.array(subtmp_sub.copy()).astype(float)
            X_spn[X_spn == 0] = np.nan
            U_, s_, V_ = OptSpace(iteration=1000).fit_transform(X_spn)
            res_raw = np.dot(np.dot(U_, s_), V_.T)
            res_raw[res_raw <= 0] = 1
            kl_raw = entropy(closure(basetmp_sub).T, closure(res_raw).T).mean()
            results[(rank_, power_, depth_, 'Raw Counts', 'KL-Div')] = [kl_raw]

            # f-stat
            resfclr = permanova(DistanceMatrix(distance.cdist(U, U)),
                                meta['group'])['test statistic']
            rawfres = permanova(DistanceMatrix(distance.cdist(U_, U_)),
                                meta['group'])['test statistic']
            results[(rank_, power_, depth_, 'rclr', 'F-Statistic')] = [resfclr]
            results[(rank_, power_, depth_, 'Raw Counts',
                     'F-Statistic')] = [rawfres]

            # KNN
            for U_tmp, method in zip([U, U_], ['rclr', 'Raw Counts']):
                pcoa_tmp = pcoa(DistanceMatrix(distance.cdist(U_tmp,
                                                              U_tmp))).samples
                pcoa_tmp.index = subtmp_sub.index
                # split
                X_train, X_test, y_train, y_test = train_test_split(
                    pcoa_tmp,
                    meta['group'].ravel(),
Пример #18
0
perm_res = {}
perm_res_tmp = {}
for dataset_, subs in distances.items():
    perm_res[dataset_] = {}
    perm_res_tmp[dataset_] = {}
    for (fold_, Nsamp_), methods_ in subs.items():
        meta_ = meta[dataset_][(fold_, Nsamp_)]['metadata']
        if len(meta_.index) < Nsamp_:
            continue
        perm_res[dataset_][(fold_, Nsamp_)] = {}
        perm_res_tmp[dataset_][(fold_, Nsamp_)] = {}
        for method, dist_tmp in methods_.items():
            perm_res[dataset_][(fold_, Nsamp_)][method] = {}
            dist_tmp = DistanceMatrix(dist_tmp)
            perm_tmp = permanova(
                dist_tmp, meta[dataset_][(fold_, Nsamp_)]['metadata'][
                    case_study[dataset_]['factor']].values)
            perm_res[dataset_][(
                fold_,
                Nsamp_)][method]['test statistic'] = perm_tmp['test statistic']
            perm_res_tmp[dataset_][(fold_, Nsamp_)] = pd.DataFrame(
                perm_res[dataset_][(fold_, Nsamp_)])

    both_perm_res[dataset_] = pd.concat(perm_res_tmp[dataset_])

# run calssiification
import warnings
warnings.simplefilter('ignore')  #for PCoA warnings
from skbio.stats.ordination import pcoa
from sklearn import metrics
from sklearn.cluster import KMeans
Пример #19
0
 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
Пример #20
0
        sample_id = each_sample_split[0]
        sample_group = each_sample_split[1]
        sample_id_list.append(sample_id)
        sample_group_list.append(sample_group)

# read in data as dataframe
df = pd.read_csv(infile_data, sep='\t')

# get list of list from dataframe
lol_data_in = []
for col_id in sample_id_list:
    column_num_list = (df[col_id].values).tolist()
    lol_data_in.append(column_num_list)

# calculate distance matrix
dist_arrary = pairwise_distances(lol_data_in,
                                 lol_data_in,
                                 metric=distance_metric)

# add sample id to distance matrix
dist_matrix = DistanceMatrix(dist_arrary, sample_id_list)

# perform anosim test
anosim_test = anosim(dist_matrix, sample_group_list, permutations=999)
print(anosim_test)
print()

# perform permanova test
permanova_test = permanova(dist_matrix, sample_group_list, permutations=999)
print(permanova_test)
Пример #21
0
        text=
        "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem."
    )

eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"])
eig_dm["Explained"] = pc.proportion_explained
eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum()
if metric == "minkowski":
    eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t")
else:
    eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t")

#Statistics

anos = anosim(div, map_DF, column=var, permutations=999)
perm = permanova(div, map_DF, column=var, permutations=999)

if metric == "minkowski":
    stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt"
else:
    stat_file = "statistics_" + mname + "_" + var + ".txt"

with open(stat_file, "w") as st:
    st.write("ANOSIM\tPermutations: 999\n\n")
    st.write("R\t" + str(anos["test statistic"]) + "\n")
    st.write("p-value\t" + str(anos["p-value"]) + "\n\n")
    st.write("PERMANOVA\tPermutations: 999\n\n")
    st.write("F\t" + str(perm["test statistic"]) + "\n")
    st.write("p-value\t" + str(perm["p-value"]) + "\n\n")

end = time.time()
Пример #22
0
 def test_call_no_permutations(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, np.nan, 0], name="PERMANOVA results"
     )
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
Пример #23
0
for a in range(len(rows[0])):
    if a > 0:
        this_sample = []
        for b in range(len(rows)):
            if b > 0:
                this_sample.append(float(rows[b][a]))
        samples.append(this_sample)
"""
only_samples = ['LR', 'SR']
new_samples, new_names = [], []
for a in range(len(sample_names)):
    for b in range(len(only_samples)):
        if sample_names[a] == only_samples[b]:
            new_samples.append(samples[a])
            new_names.append(sample_names[a])
samples = new_samples
sample_names = new_names
print(len(samples), len(sample_names))
"""

sam_dm = dm.from_iterable(samples, metric=braycurtis)
pdisp = permdisp(sam_dm,
                 sample_names,
                 column=None,
                 test='median',
                 permutations=999)
print(pdisp)
asim = anosim(sam_dm, sample_names, column=None, permutations=999)
print(asim)
perm = permanova(sam_dm, sample_names, column=None, permutations=999)
print(perm)