def tabulate(output_dir: str, input: qiime2.Metadata, page_size: int = 100) -> None: if page_size < 1: raise ValueError('Cannot render less than one record per page.') df = input.to_dataframe() df_columns = pd.MultiIndex.from_tuples([(n, t.type) for n, t in input.columns.items()], names=['column header', 'type']) df.columns = df_columns df.reset_index(inplace=True) table = df.to_json(orient='split') index = os.path.join(TEMPLATES, 'tabulate', 'index.html') q2templates.render(index, output_dir, context={ 'table': table, 'page_size': page_size }) input.save(os.path.join(output_dir, 'metadata.tsv')) js = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.js') os.mkdir(os.path.join(output_dir, 'js')) shutil.copy(js, os.path.join(output_dir, 'js', 'datatables.min.js')) css = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.css') os.mkdir(os.path.join(output_dir, 'css')) shutil.copy(css, os.path.join(output_dir, 'css', 'datatables.min.css'))
def differentialtest(table: biom.Table, metadata: qiime2.Metadata, variable: str, taxonomy: TSVTaxonomyFormat) -> pd.DataFrame: if table.is_empty(): raise ValueError("The provided table object is empty") ## run the R script on the file with tempfile.TemporaryDirectory() as temp_dir_name: ## write the biom table to file input_table = os.path.join(temp_dir_name, 'table.tsv') input_metadata = os.path.join(temp_dir_name, 'metadata.tsv') with open(input_table, 'w') as fh: fh.write(table.to_tsv()) metadata.save(input_metadata) output = os.path.join(temp_dir_name, 'data.tsv') cmd = [ 'differentialtest.R', input_table, input_metadata, str(taxonomy), str(variable), str(output) ] run_commands([cmd]) data = pd.read_csv(output, sep='\t') data.index.name = 'Feature ID' return data
class TestResourceManagerUpdateMetadata(TempfileTestCase): def setUp(self): super().setUp() self.resources = ResourceManager() def test_resource_manager_update_metadata_correct(self): self.metadata_fp = self.create_tempfile(suffix='.txt').name self.test_metadata = pd.DataFrame({ 'age_cat': ['30s', '40s', '50s', '30s'], 'num': [7.15, 9.04, 8.25, 7.24], }, index=pd.Series(['a', 'b', 'c', 'd'], name='#SampleID') ) self.q2_metadata = Metadata(self.test_metadata) self.q2_metadata.save(self.metadata_fp) self.resources.update({'metadata': self.metadata_fp}) self.assertCountEqual(['metadata'], self.resources.keys()) assert_frame_equal(self.resources['metadata'], self.test_metadata) def test_resource_manager_update_metadata_does_not_exist(self): self.metadata_fh2 = self.create_tempfile(suffix='.txt') self.metadata_fh2.close() self.metadata_fp_dne = self.metadata_fh2.name with self.assertRaises(MetadataFileError): self.resources.update({'metadata': self.metadata_fp_dne}) def test_resource_manager_update_metadata_non_string_dict(self): with self.assertRaisesRegex(MetadataFileError, r'\{(.*)\}'): self.resources.update({'metadata': {'put': 'some', 'other': 'type'}})
def adonis(output_dir: str, distance_matrix: skbio.DistanceMatrix, metadata: qiime2.Metadata, formula: str, permutations: int = 999, n_jobs: int = 1) -> None: # Validate sample metadata is superset et cetera metadata_ids = set(metadata.ids) dm_ids = distance_matrix.ids _validate_metadata_is_superset(metadata_ids, set(dm_ids)) # filter ids. ids must be in same order as dm filtered_md = metadata.to_dataframe().reindex(dm_ids) filtered_md.index.name = 'sample-id' metadata = qiime2.Metadata(filtered_md) # Validate formula terms = ModelDesc.from_formula(formula) for t in terms.rhs_termlist: for i in t.factors: column = metadata.get_column(i.name()) if column.has_missing_values(): raise ValueError( 'adonis requires metadata columns with no ' 'NaN values (missing values in column `%s`.)' % (column.name, )) # Run adonis results_fp = os.path.join(output_dir, 'adonis.tsv') with tempfile.TemporaryDirectory() as temp_dir_name: dm_fp = os.path.join(temp_dir_name, 'dm.tsv') distance_matrix.write(dm_fp) md_fp = os.path.join(temp_dir_name, 'md.tsv') metadata.save(md_fp) cmd = [ 'run_adonis.R', dm_fp, md_fp, formula, str(permutations), str(n_jobs), results_fp ] _run_command(cmd) # Visualize results results = pd.read_csv(results_fp, sep='\t') results = q2templates.df_to_html(results) index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html') q2templates.render(index, output_dir, context={'results': results})
def tabulate(output_dir: str, input: qiime2.Metadata, page_size: int = 100) -> None: if page_size < 1: raise ValueError('Cannot render less than one record per page.') df = input.to_dataframe() df_columns = pd.MultiIndex.from_tuples([(n, t.type) for n, t in input.columns.items()], names=['column header', 'type']) df.columns = df_columns df.reset_index(inplace=True) # `force_ascii` ensures that unicode code points are emitted. `True` is the # default setting for this parameter, but explicitly setting here in case # of future pandas API changes. table = df.to_json(orient='split', force_ascii=True) # JSON spec doesn't allow single quotes in string values, at all. It does # however allow unicode values. table = table.replace("'", r'\u0027') index = os.path.join(TEMPLATES, 'tabulate', 'index.html') q2templates.render(index, output_dir, context={ 'table': table, 'page_size': page_size }) input.save(os.path.join(output_dir, 'metadata.tsv')) js = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.js') os.mkdir(os.path.join(output_dir, 'js')) shutil.copy(js, os.path.join(output_dir, 'js', 'datatables.min.js')) css = os.path.join(TEMPLATES, 'tabulate', 'datatables.min.css') os.mkdir(os.path.join(output_dir, 'css')) shutil.copy(css, os.path.join(output_dir, 'css', 'datatables.min.css'))
def adonis(output_dir: str, distance_matrix: skbio.DistanceMatrix, metadata: qiime2.Metadata, formula: str, permutations: int = 999, n_jobs: str = 1) -> None: # Validate sample metadata is superset et cetera metadata_ids = set(metadata.ids) dm_ids = distance_matrix.ids _validate_metadata_is_superset(metadata_ids, set(dm_ids)) # filter ids. ids must be in same order as dm filtered_md = metadata.to_dataframe().reindex(dm_ids) filtered_md.index.name = 'sample-id' metadata = qiime2.Metadata(filtered_md) # Validate formula terms = ModelDesc.from_formula(formula) for t in terms.rhs_termlist: for i in t.factors: metadata.get_column(i.name()) # Run adonis results_fp = os.path.join(output_dir, 'adonis.tsv') with tempfile.TemporaryDirectory() as temp_dir_name: dm_fp = os.path.join(temp_dir_name, 'dm.tsv') distance_matrix.write(dm_fp) md_fp = os.path.join(temp_dir_name, 'md.tsv') metadata.save(md_fp) cmd = ['run_adonis.R', dm_fp, md_fp, formula, str(permutations), str(n_jobs), results_fp] _run_command(cmd) # Visualize results results = pd.read_csv(results_fp, sep='\t') results = q2templates.df_to_html(results) index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html') q2templates.render(index, output_dir, context={'results': results})
def _2(obj: qiime2.Metadata) -> DADA2StatsFormat: ff = DADA2StatsFormat() obj.save(str(ff)) return ff
def _2(obj: qiime2.Metadata) -> SongbirdStatsFormat: ff = SongbirdStatsFormat() obj.save(str(ff)) return ff
def _14(obj: Metadata) -> ReconSummaryFormat: ff = ReconSummaryFormat() obj.save(str(ff)) return ff
index='subject-id') exmp1_subject_data = exmp1_subject_data.join(get_sheet_as_df( spreadsheet_id, 'exmp1-weekly-steps', index='subject-id'), on='subject-id') exmp1_subject_data = exmp1_subject_data.join(get_sheet_as_df( spreadsheet_id, 'exmp1-weekly-nmvpa', index='subject-id'), on='subject-id') # extend with exmp2 subject data, which has no overlapping subject-ids and # some of the same columns exmp2_subject_data = get_sheet_as_df(spreadsheet_id, 'exmp2-subject-data', index='subject-id') subject_data = pd.concat([exmp1_subject_data, exmp2_subject_data], sort=False) sample_metadata = get_sheet_as_df(spreadsheet_id, 'combined-minimal', index='sample-id') subject_data_indexed_by_sample_id = subject_data.loc[ sample_metadata['subject-id']].set_index(sample_metadata.index) sample_metadata = sample_metadata.join(subject_data_indexed_by_sample_id, on='sample-id', lsuffix='', rsuffix='_drop_me') sample_metadata = sample_metadata.replace(r'^\s*$', np.nan, regex=True) sample_metadata = Metadata(sample_metadata) sample_metadata.save('sample-metadata.tsv') tabulate(sample_metadata).visualization.save('sample-metadata.qzv')
def _4(obj: qiime2.Metadata) -> MMvecStatsFormat: ff = MMvecStatsFormat() obj.save(str(ff)) return ff
def _2(obj: qiime2.Metadata) -> LogRatiosFormat: ff = LogRatiosFormat() obj.save(str(ff)) return ff