Пример #1
0
 def _create_ordination_results(self):
     eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868]
     proportion_explained = [
         0.2675738328, 0.157044696, 0.1399118638, 0.1091402725
     ]
     sample_ids = [
         '1.SKM4.640180', '1.SKB8.640193', '1.SKD8.640184', '1.SKM9.640192',
         '1.SKB7.640196'
     ]
     axis_labels = ['PC1', 'PC2', 'PC3', 'PC4']
     samples = [[-2.584, 1.739, 3.828, -1.944],
                [-2.710, -1.859, -8.648, 1.180],
                [2.350, 9.625, -3.457,
                 -3.208], [2.614, -1.114, 1.476, 2.908],
                [2.850, -1.925, 6.232, 1.381]]
     ord_res = OrdinationResults(
         short_method_name='PCoA',
         long_method_name='Principal Coordinate Analysis',
         eigvals=pd.Series(eigvals, index=axis_labels),
         samples=pd.DataFrame(np.asarray(samples),
                              index=sample_ids,
                              columns=axis_labels),
         proportion_explained=pd.Series(proportion_explained,
                                        index=axis_labels))
     fd, fp = mkstemp(suffix='.txt', dir=self.out_dir)
     close(fd)
     ord_res.write(fp)
     return fp
Пример #2
0
def rpca(in_biom: str, output_dir: str,
         min_sample_depth: int, rank: int) -> None:
    """ Runs RPCA with an rclr preprocessing step"""

    # import table
    table = load_table(in_biom)
    # filter sample to min depth

    def sample_filter(val, id_, md): return sum(val) > min_sample_depth
    table = table.filter(sample_filter, axis='sample')
    table = table.to_dataframe().T.drop_duplicates()
    # rclr for saving the transformed OTU table (RSC edited)
    tablefit = rclr().fit_transform(table.copy())
    U,s,V = OptSpace().fit_transform(tablefit)
    tablefit = np.dot(np.dot(U, s), V.T)
    tablefit = pd.DataFrame(tablefit.T, index=table.columns, columns=table.index)
    with open(os.path.join(output_dir, 'rclr_OTUtable.txt'), 'w'):
        tablefit.to_csv(os.path.join(output_dir, 'rclr_OTUtable.txt'), sep='\t', index_label='OTU_ID')
    
    # rclr preprocessing and OptSpace (RPCA)
    opt = OptSpace(rank=rank).fit(rclr().fit_transform(table.copy()))
    rename_cols = {i - 1: 'PC' + str(i) for i in range(1, rank + 1)}

    # Feature Loadings
    feature_loading = pd.DataFrame(opt.feature_weights, index=table.columns)
    feature_loading = feature_loading.rename(columns=rename_cols)
    feature_loading.sort_values('PC1', inplace=True, ascending=True)

    # Sample Loadings
    sample_loading = pd.DataFrame(opt.sample_weights, index=table.index)
    sample_loading = sample_loading.rename(columns=rename_cols)

    proportion_explained = pd.Series(opt.explained_variance_ratio,
                                     index=list(rename_cols.values()))
    eigvals = pd.Series(opt.eigenvalues,
                        index=list(rename_cols.values()))
    # save ordination results
    ord_res = OrdinationResults(
        'PCoA',
        'Principal Coordinate Analysis',
        eigvals.copy(),
        sample_loading.copy(),
        features=feature_loading.copy(),
        proportion_explained=proportion_explained.copy())
    # write files to output folder
    ord_res.write(os.path.join(output_dir, 'RPCA_Ordination.txt'))
    # save distance matrix
    dist_res = skbio.stats.distance.DistanceMatrix(
        opt.distance, ids=sample_loading.index)
    dist_res.write(os.path.join(output_dir, 'RPCA_distance.txt'))
    return
Пример #3
0
def _1(data: skbio.OrdinationResults) -> OrdinationFormat:
    ff = OrdinationFormat()
    data.write(str(ff), format='ordination')
    return ff
Пример #4
0
def _1(data: skbio.OrdinationResults) -> OrdinationFormat:
    ff = OrdinationFormat()
    data.write(str(ff), format='ordination')
    return ff