def test_combine_map_label_cols(self): """combine_map_label_cols: Combine two or more columns from the \ mapping file""" self.combinecolorby = ['Day', 'Type'] exp=[["Sample-ID","Day","Type","Day&&Type"],\ ["Sample1","Day1","Soil","Day1Soil"],\ ["Sample2","Day1","Soil","Day1Soil"],\ ["Sample3","Day1","Soil","Day1Soil"]] obs = combine_map_label_cols(self.combinecolorby, self.mapping) self.assertEqual(obs, exp)
def test_combine_map_label_cols(self): """combine_map_label_cols: Combine two or more columns from the \ mapping file""" self.combinecolorby = ['Day', 'Type'] exp = [["Sample-ID", "Day", "Type", "Day&&Type"], ["Sample1", "Day1", "Soil", "Day1Soil"], ["Sample2", "Day1", "Soil", "Day1Soil"], ["Sample3", "Day1", "Soil", "Day1Soil"]] obs = combine_map_label_cols(self.combinecolorby, self.mapping) self.assertEqual(obs, exp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) mapping_fp = opts.mapping_fp mapping_category = opts.mapping_category otu_table_fp = opts.otu_table_fp output_fp = opts.output_fp normalize = opts.normalize # define a function that returns the bin a sample shouldbe placed into bin_function = lambda id_, sample_metadata:\ sample_metadata[mapping_category] # parse the sample metadata and add it to the OTU table (we assume that # sample metadata is not already present in the table) mapping, headers, comments = parse_mapping_file(open(mapping_fp, 'U')) # added in ability to combine metadata columns and summarize based on the # new combined category if '&&' in mapping_category: new_mapping = [] new_mapping.append(headers) for i in range(len(mapping)): new_mapping.append(mapping[i]) # Create an array using multiple columns from mapping file combinecolorby = mapping_category.split('&&') mapping = combine_map_label_cols(combinecolorby, new_mapping) sample_metadata = mapping_file_to_dict(mapping, headers) with biom_open(otu_table_fp, 'U') as biom_file: table = parse_biom_table(biom_file) table.add_metadata(sample_metadata) # create a new OTU table where samples are binned based on their return # value from bin_function result = table.collapse(bin_function, norm=False, min_group_size=1, axis='sample') # normalize the result if requested by the user if normalize: result.norm(axis='sample', inplace=True) # write a new BIOM file write_biom_table(result, output_fp)