Python permanova примеры, skbio.stats.distance.permanova Python примеры использования

Пример #1

0

Показать файл

Файл: test_permanova.py Проект: squarednob/scikit-bio

    def test_call_unequal_group_sizes(self):
        exp = pd.Series(
            index=self.exp_index,
            data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645, 999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)

Пример #2

0

Показать файл

Файл: test_permanova.py Проект: AndreaEdwards/scikit-bio

    def test_call_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645,
                              999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)

Пример #3

0

Показать файл

Файл: test_permanova.py Проект: RNAer/scikit-bio

 def test_call_no_ties(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, 0.332, 999], name="PERMANOVA results"
     )
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)

Пример #4

0

Показать файл

Файл: test_permanova.py Проект: squarednob/scikit-bio

    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)

Пример #5

0

Показать файл

Файл: test_permanova.py Проект: AndreaEdwards/scikit-bio

    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)

Пример #6

0

Показать файл

 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999],
                     name='PERMANOVA results')
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)

Пример #7

0

Показать файл

Файл: permanova.py Проект: HWChang/emmer

def permanovaResult(args, current_wd, retrospect_dir, output_file_tag,
                    notebook_name, suppress, silence, neglect):
    # python3 -m emmer.bake -m 'Permanova' -i emmer/data/bake_data_dir_6/filtered_infoRich__PCA_coordinates.csv

    permanova_args = PermanovaArgs(args=args,
                                   current_wd=current_wd,
                                   suppress=suppress,
                                   silence=silence)

    ## conduct PERMANOVA
    numpy.random.seed(0)

    result = permanova(
        permanova_args.dist_matrix, permanova_args.cluster,
        permutations=999)  ## TODO: allow user-define $permutations and $seed
    print(result)

    notebook = UpdateNoteBook(notebook_name=notebook_name,
                              neglect=neglect).updatePermanovaResult(
                                  set_seed='0',
                                  set_cluster=permanova_args.cluster,
                                  test_result=result)

    parameter_df = pandas.DataFrame({
        'individual': permanova_args.individual,
        'cluster': permanova_args.cluster
    })
    output_file_name = os.path.join(
        retrospect_dir,
        (output_file_tag + '_retrospect_permanova_parameter.csv'))
    parameter_df.to_csv(output_file_name)

Пример #8

0

Показать файл

 def permanova_permdisp(self):
     # compute the permanova
     print('running permdisp\n\n')
     print(permdisp(distance_matrix=DistanceMatrix(self.dist_df),
                    grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=999))
     print('running permanova\n\n')
     print(permanova(distance_matrix=DistanceMatrix(self.dist_df),
                     grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=9999))

Пример #9

0

Показать файл

Файл: Diversity.py Проект: lgguzman/biodiversity

 def testPer(self, dist, group):
     per = self.permanova(dist, group)
     print(per[0])
     print(per[2])
     print(
         permanova(DistanceMatrix(dist, range(len(group))),
                   group,
                   column=None,
                   permutations=999))

Пример #10

0

Показать файл

Файл: effect_size.py Проект: serenejiang/evident

def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)

Пример #11

0

Показать файл

Файл: effect_size.py Проект: antgonza/evident

def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)

Пример #12

0

Показать файл

Файл: permanova_test.py Проект: derekreiman/Meta-Signer

def get_permanova_ranked_list(x, y, feature_list, label_set):
    x = x.transpose().values

    values = []
    for f in range(len(feature_list)):
        sub_x = x[:, f]
        dist = pairwise_distances(sub_x.reshape(-1, 1),
                                  sub_x.reshape(-1, 1),
                                  metric="cityblock")
        dist = DistanceMatrix(data=dist)
        perm = permanova(dist, y)
        values.append(perm.loc["p-value"])

    fdr_values = multipletests(values, method="fdr_bh")[1]
    permanova_df = pd.DataFrame(index=feature_list,
                                data={
                                    "p-value": np.array(values).reshape(-1),
                                    "Adj p-value":
                                    np.array(fdr_values).reshape(-1)
                                })
    return permanova_df

Пример #13

0

Показать файл

def pseudoF_permanova(points, labels):
    """ Statistical significance is assessed via a permutation test.
     The assignment of objects to groups (grouping) is randomly permuted a number of times
     (controlled via permutations). A pseudo-F statistic is computed for each permutation and the
     p-value is the proportion of
    permuted pseudo-F statisics that are equal to or greater than the original
     (unpermuted) pseudo-F statistic. (using sklearn pairwise euclidean_distance function)

    Parameters
    ----------
    points : np.array
        np.array([N, p]) of all points
    labels: np.array
        np.array([N]) labels of all points
    """
    distances = skbio.DistanceMatrix(points.as_matrix())
    ks = np.sort(np.unique(labels))

    pseudo_f = permanova(distances, labels)
    print(pseudo_f)
    return pseudo_f

Пример #14

0

Показать файл

sns.set(font_scale=1.5, style="ticks")
g = sns.FacetGrid(tsne, hue="taxa", height=10, aspect=16 / 10)
gm = g.map(plt.scatter, "x", "y", alpha=0.25)
means = tsne.groupby(taxa).agg("median").reset_index()
texts = means.apply(lambda df: plt.text(df.x, df.y, df.taxa, alpha=0.65),
                    axis=1)
texts = adjust_text(
    texts,
    force_text=(0.02, 0.5),
    arrowprops=dict(arrowstyle="-|>", alpha=0.5, color="k"),
)
plt.savefig("figures/individual_media.png", dpi=200)
plt.close()

# Some statistics about metabolite usage
# indicator matrix 0 = metabolite not consumed, 1 = metabolite consumed
binary = mat.where(mat < -1e-6, 0).where(mat > -1e-6, 1)

# Jaccard distances = 1 - percent overlap
J = pdist(binary, "jaccard")
print("Jaccard distances:", pd.Series(J).describe(), sep="\n")

# euclidean distances
E = pdist(mat, "euclidean")

# Test whether genus explains a good amount of that variation
p = permanova(DistanceMatrix(E), taxa)
r2 = 1 - 1 / (1 + p[4] * p[3] / (p[2] - p[3] - 1))
p["R2"] = r2
print("PERMANOVA on euclidean distances:", p, sep="\n")

Пример #15

0

Показать файл

Файл: test_permanova.py Проект: AndreaEdwards/scikit-bio

 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)

Пример #16

0

Показать файл

Файл: test_permanova.py Проект: AndreaEdwards/scikit-bio

 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999])
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)

Пример #17

0

Показать файл

            clr_res = clr_inv(np.dot(np.dot(U, s), V.T))
            # use just kl_div here because already closed
            kl_clr = entropy(closure(basetmp_sub).T, clr_res.T).mean()
            results[(rank_, power_, depth_, 'rclr', 'KL-Div')] = [kl_clr]

            # test KL without rclr
            X_spn = np.array(subtmp_sub.copy()).astype(float)
            X_spn[X_spn == 0] = np.nan
            U_, s_, V_ = OptSpace(iteration=1000).fit_transform(X_spn)
            res_raw = np.dot(np.dot(U_, s_), V_.T)
            res_raw[res_raw <= 0] = 1
            kl_raw = entropy(closure(basetmp_sub).T, closure(res_raw).T).mean()
            results[(rank_, power_, depth_, 'Raw Counts', 'KL-Div')] = [kl_raw]

            # f-stat
            resfclr = permanova(DistanceMatrix(distance.cdist(U, U)),
                                meta['group'])['test statistic']
            rawfres = permanova(DistanceMatrix(distance.cdist(U_, U_)),
                                meta['group'])['test statistic']
            results[(rank_, power_, depth_, 'rclr', 'F-Statistic')] = [resfclr]
            results[(rank_, power_, depth_, 'Raw Counts',
                     'F-Statistic')] = [rawfres]

            # KNN
            for U_tmp, method in zip([U, U_], ['rclr', 'Raw Counts']):
                pcoa_tmp = pcoa(DistanceMatrix(distance.cdist(U_tmp,
                                                              U_tmp))).samples
                pcoa_tmp.index = subtmp_sub.index
                # split
                X_train, X_test, y_train, y_test = train_test_split(
                    pcoa_tmp,
                    meta['group'].ravel(),

Пример #18

0

Показать файл

perm_res = {}
perm_res_tmp = {}
for dataset_, subs in distances.items():
    perm_res[dataset_] = {}
    perm_res_tmp[dataset_] = {}
    for (fold_, Nsamp_), methods_ in subs.items():
        meta_ = meta[dataset_][(fold_, Nsamp_)]['metadata']
        if len(meta_.index) < Nsamp_:
            continue
        perm_res[dataset_][(fold_, Nsamp_)] = {}
        perm_res_tmp[dataset_][(fold_, Nsamp_)] = {}
        for method, dist_tmp in methods_.items():
            perm_res[dataset_][(fold_, Nsamp_)][method] = {}
            dist_tmp = DistanceMatrix(dist_tmp)
            perm_tmp = permanova(
                dist_tmp, meta[dataset_][(fold_, Nsamp_)]['metadata'][
                    case_study[dataset_]['factor']].values)
            perm_res[dataset_][(
                fold_,
                Nsamp_)][method]['test statistic'] = perm_tmp['test statistic']
            perm_res_tmp[dataset_][(fold_, Nsamp_)] = pd.DataFrame(
                perm_res[dataset_][(fold_, Nsamp_)])

    both_perm_res[dataset_] = pd.concat(perm_res_tmp[dataset_])

# run calssiification
import warnings
warnings.simplefilter('ignore')  #for PCoA warnings
from skbio.stats.ordination import pcoa
from sklearn import metrics
from sklearn.cluster import KMeans

Пример #19

0

Показать файл

Файл: test_permanova.py Проект: squarednob/scikit-bio

 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)

Пример #20

0

Показать файл

Файл: Test_6_permanova.py Проект: songweizhi/FlowCellBiofilm

        sample_id = each_sample_split[0]
        sample_group = each_sample_split[1]
        sample_id_list.append(sample_id)
        sample_group_list.append(sample_group)

# read in data as dataframe
df = pd.read_csv(infile_data, sep='\t')

# get list of list from dataframe
lol_data_in = []
for col_id in sample_id_list:
    column_num_list = (df[col_id].values).tolist()
    lol_data_in.append(column_num_list)

# calculate distance matrix
dist_arrary = pairwise_distances(lol_data_in,
                                 lol_data_in,
                                 metric=distance_metric)

# add sample id to distance matrix
dist_matrix = DistanceMatrix(dist_arrary, sample_id_list)

# perform anosim test
anosim_test = anosim(dist_matrix, sample_group_list, permutations=999)
print(anosim_test)
print()

# perform permanova test
permanova_test = permanova(dist_matrix, sample_group_list, permutations=999)
print(permanova_test)

Пример #21

0

Показать файл

        text=
        "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem."
    )

eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"])
eig_dm["Explained"] = pc.proportion_explained
eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum()
if metric == "minkowski":
    eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t")
else:
    eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t")

#Statistics

anos = anosim(div, map_DF, column=var, permutations=999)
perm = permanova(div, map_DF, column=var, permutations=999)

if metric == "minkowski":
    stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt"
else:
    stat_file = "statistics_" + mname + "_" + var + ".txt"

with open(stat_file, "w") as st:
    st.write("ANOSIM\tPermutations: 999\n\n")
    st.write("R\t" + str(anos["test statistic"]) + "\n")
    st.write("p-value\t" + str(anos["p-value"]) + "\n\n")
    st.write("PERMANOVA\tPermutations: 999\n\n")
    st.write("F\t" + str(perm["test statistic"]) + "\n")
    st.write("p-value\t" + str(perm["p-value"]) + "\n\n")

end = time.time()

Пример #22

0

Показать файл

Файл: test_permanova.py Проект: RNAer/scikit-bio

 def test_call_no_permutations(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, np.nan, 0], name="PERMANOVA results"
     )
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)

Пример #23

0

Показать файл

for a in range(len(rows[0])):
    if a > 0:
        this_sample = []
        for b in range(len(rows)):
            if b > 0:
                this_sample.append(float(rows[b][a]))
        samples.append(this_sample)
"""
only_samples = ['LR', 'SR']
new_samples, new_names = [], []
for a in range(len(sample_names)):
    for b in range(len(only_samples)):
        if sample_names[a] == only_samples[b]:
            new_samples.append(samples[a])
            new_names.append(sample_names[a])
samples = new_samples
sample_names = new_names
print(len(samples), len(sample_names))
"""

sam_dm = dm.from_iterable(samples, metric=braycurtis)
pdisp = permdisp(sam_dm,
                 sample_names,
                 column=None,
                 test='median',
                 permutations=999)
print(pdisp)
asim = anosim(sam_dm, sample_names, column=None, permutations=999)
print(asim)
perm = permanova(sam_dm, sample_names, column=None, permutations=999)
print(perm)

Python permanova примеры использования