Пример #1
0
def tabulate(output_dir: str,
             input: qiime2.Metadata,
             page_size: int = 100) -> None:
    if page_size < 1:
        raise ValueError('Cannot render less than one record per page.')

    df = input.to_dataframe()
    df_columns = pd.MultiIndex.from_tuples([(n, t.type)
                                            for n, t in input.columns.items()],
                                           names=['column header', 'type'])
    df.columns = df_columns
    df.reset_index(inplace=True)
    table = df.to_json(orient='split')
    index = os.path.join(TEMPLATES, 'tabulate', 'index.html')
    q2templates.render(index,
                       output_dir,
                       context={
                           'table': table,
                           'page_size': page_size
                       })

    input.save(os.path.join(output_dir, 'metadata.tsv'))

    js = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.js')
    os.mkdir(os.path.join(output_dir, 'js'))
    shutil.copy(js, os.path.join(output_dir, 'js', 'datatables.min.js'))

    css = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.css')
    os.mkdir(os.path.join(output_dir, 'css'))
    shutil.copy(css, os.path.join(output_dir, 'css', 'datatables.min.css'))
Пример #2
0
def differentialtest(table: biom.Table, metadata: qiime2.Metadata,
                     variable: str,
                     taxonomy: TSVTaxonomyFormat) -> pd.DataFrame:

    if table.is_empty():
        raise ValueError("The provided table object is empty")
    ## run the R script on the file
    with tempfile.TemporaryDirectory() as temp_dir_name:
        ## write the biom table to file
        input_table = os.path.join(temp_dir_name, 'table.tsv')
        input_metadata = os.path.join(temp_dir_name, 'metadata.tsv')

        with open(input_table, 'w') as fh:
            fh.write(table.to_tsv())
        metadata.save(input_metadata)

        output = os.path.join(temp_dir_name, 'data.tsv')

        cmd = [
            'differentialtest.R', input_table, input_metadata,
            str(taxonomy),
            str(variable),
            str(output)
        ]
        run_commands([cmd])
        data = pd.read_csv(output, sep='\t')
        data.index.name = 'Feature ID'
    return data
class TestResourceManagerUpdateMetadata(TempfileTestCase):

    def setUp(self):
        super().setUp()
        self.resources = ResourceManager()

    def test_resource_manager_update_metadata_correct(self):
        self.metadata_fp = self.create_tempfile(suffix='.txt').name
        self.test_metadata = pd.DataFrame({
            'age_cat': ['30s', '40s', '50s', '30s'],
            'num': [7.15, 9.04, 8.25, 7.24],
        }, index=pd.Series(['a', 'b', 'c', 'd'], name='#SampleID')
        )
        self.q2_metadata = Metadata(self.test_metadata)
        self.q2_metadata.save(self.metadata_fp)
        self.resources.update({'metadata': self.metadata_fp})
        self.assertCountEqual(['metadata'], self.resources.keys())
        assert_frame_equal(self.resources['metadata'], self.test_metadata)

    def test_resource_manager_update_metadata_does_not_exist(self):
        self.metadata_fh2 = self.create_tempfile(suffix='.txt')
        self.metadata_fh2.close()
        self.metadata_fp_dne = self.metadata_fh2.name
        with self.assertRaises(MetadataFileError):
            self.resources.update({'metadata': self.metadata_fp_dne})

    def test_resource_manager_update_metadata_non_string_dict(self):
        with self.assertRaisesRegex(MetadataFileError, r'\{(.*)\}'):
            self.resources.update({'metadata': {'put': 'some',
                                                'other': 'type'}})
Пример #4
0
def adonis(output_dir: str,
           distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata,
           formula: str,
           permutations: int = 999,
           n_jobs: int = 1) -> None:
    # Validate sample metadata is superset et cetera
    metadata_ids = set(metadata.ids)
    dm_ids = distance_matrix.ids
    _validate_metadata_is_superset(metadata_ids, set(dm_ids))
    # filter ids. ids must be in same order as dm
    filtered_md = metadata.to_dataframe().reindex(dm_ids)
    filtered_md.index.name = 'sample-id'
    metadata = qiime2.Metadata(filtered_md)

    # Validate formula
    terms = ModelDesc.from_formula(formula)
    for t in terms.rhs_termlist:
        for i in t.factors:
            column = metadata.get_column(i.name())
            if column.has_missing_values():
                raise ValueError(
                    'adonis requires metadata columns with no '
                    'NaN values (missing values in column `%s`.)' %
                    (column.name, ))

    # Run adonis
    results_fp = os.path.join(output_dir, 'adonis.tsv')
    with tempfile.TemporaryDirectory() as temp_dir_name:
        dm_fp = os.path.join(temp_dir_name, 'dm.tsv')
        distance_matrix.write(dm_fp)
        md_fp = os.path.join(temp_dir_name, 'md.tsv')
        metadata.save(md_fp)
        cmd = [
            'run_adonis.R', dm_fp, md_fp, formula,
            str(permutations),
            str(n_jobs), results_fp
        ]
        _run_command(cmd)

    # Visualize results
    results = pd.read_csv(results_fp, sep='\t')
    results = q2templates.df_to_html(results)
    index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html')
    q2templates.render(index, output_dir, context={'results': results})
Пример #5
0
def tabulate(output_dir: str,
             input: qiime2.Metadata,
             page_size: int = 100) -> None:
    if page_size < 1:
        raise ValueError('Cannot render less than one record per page.')

    df = input.to_dataframe()
    df_columns = pd.MultiIndex.from_tuples([(n, t.type)
                                            for n, t in input.columns.items()],
                                           names=['column header', 'type'])
    df.columns = df_columns
    df.reset_index(inplace=True)
    # `force_ascii` ensures that unicode code points are emitted. `True` is the
    # default setting for this parameter, but explicitly setting here in case
    # of future pandas API changes.
    table = df.to_json(orient='split', force_ascii=True)
    # JSON spec doesn't allow single quotes in string values, at all. It does
    # however allow unicode values.
    table = table.replace("'", r'\u0027')

    index = os.path.join(TEMPLATES, 'tabulate', 'index.html')
    q2templates.render(index,
                       output_dir,
                       context={
                           'table': table,
                           'page_size': page_size
                       })

    input.save(os.path.join(output_dir, 'metadata.tsv'))

    js = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.js')
    os.mkdir(os.path.join(output_dir, 'js'))
    shutil.copy(js, os.path.join(output_dir, 'js', 'datatables.min.js'))

    css = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.css')
    os.mkdir(os.path.join(output_dir, 'css'))
    shutil.copy(css, os.path.join(output_dir, 'css', 'datatables.min.css'))
Пример #6
0
def adonis(output_dir: str,
           distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata,
           formula: str,
           permutations: int = 999,
           n_jobs: str = 1) -> None:
    # Validate sample metadata is superset et cetera
    metadata_ids = set(metadata.ids)
    dm_ids = distance_matrix.ids
    _validate_metadata_is_superset(metadata_ids, set(dm_ids))
    # filter ids. ids must be in same order as dm
    filtered_md = metadata.to_dataframe().reindex(dm_ids)
    filtered_md.index.name = 'sample-id'
    metadata = qiime2.Metadata(filtered_md)

    # Validate formula
    terms = ModelDesc.from_formula(formula)
    for t in terms.rhs_termlist:
        for i in t.factors:
            metadata.get_column(i.name())

    # Run adonis
    results_fp = os.path.join(output_dir, 'adonis.tsv')
    with tempfile.TemporaryDirectory() as temp_dir_name:
        dm_fp = os.path.join(temp_dir_name, 'dm.tsv')
        distance_matrix.write(dm_fp)
        md_fp = os.path.join(temp_dir_name, 'md.tsv')
        metadata.save(md_fp)
        cmd = ['run_adonis.R', dm_fp, md_fp, formula, str(permutations),
               str(n_jobs), results_fp]
        _run_command(cmd)

    # Visualize results
    results = pd.read_csv(results_fp, sep='\t')
    results = q2templates.df_to_html(results)
    index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html')
    q2templates.render(index, output_dir, context={'results': results})
Пример #7
0
def _2(obj: qiime2.Metadata) -> DADA2StatsFormat:
    ff = DADA2StatsFormat()
    obj.save(str(ff))
    return ff
Пример #8
0
def _2(obj: qiime2.Metadata) -> SongbirdStatsFormat:
    ff = SongbirdStatsFormat()
    obj.save(str(ff))
    return ff
Пример #9
0
def _14(obj: Metadata) -> ReconSummaryFormat:
    ff = ReconSummaryFormat()
    obj.save(str(ff))
    return ff
                                         index='subject-id')
    exmp1_subject_data = exmp1_subject_data.join(get_sheet_as_df(
        spreadsheet_id, 'exmp1-weekly-steps', index='subject-id'),
                                                 on='subject-id')
    exmp1_subject_data = exmp1_subject_data.join(get_sheet_as_df(
        spreadsheet_id, 'exmp1-weekly-nmvpa', index='subject-id'),
                                                 on='subject-id')

    # extend with exmp2 subject data, which has no overlapping subject-ids and
    # some of the same columns
    exmp2_subject_data = get_sheet_as_df(spreadsheet_id,
                                         'exmp2-subject-data',
                                         index='subject-id')
    subject_data = pd.concat([exmp1_subject_data, exmp2_subject_data],
                             sort=False)

    sample_metadata = get_sheet_as_df(spreadsheet_id,
                                      'combined-minimal',
                                      index='sample-id')
    subject_data_indexed_by_sample_id = subject_data.loc[
        sample_metadata['subject-id']].set_index(sample_metadata.index)
    sample_metadata = sample_metadata.join(subject_data_indexed_by_sample_id,
                                           on='sample-id',
                                           lsuffix='',
                                           rsuffix='_drop_me')
    sample_metadata = sample_metadata.replace(r'^\s*$', np.nan, regex=True)

    sample_metadata = Metadata(sample_metadata)
    sample_metadata.save('sample-metadata.tsv')
    tabulate(sample_metadata).visualization.save('sample-metadata.qzv')
Пример #11
0
def _4(obj: qiime2.Metadata) -> MMvecStatsFormat:
    ff = MMvecStatsFormat()
    obj.save(str(ff))
    return ff
Пример #12
0
def _2(obj: qiime2.Metadata) -> LogRatiosFormat:
    ff = LogRatiosFormat()
    obj.save(str(ff))
    return ff