def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_include_fp = opts.otu_include_fp output_fp = opts.output_fp verbose = opts.verbose category_mapping_fp = opts.category_mapping_fp individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category threshold = opts.threshold collate_results = opts.collate_results # check mapping file category_mapping = open(category_mapping_fp, 'U') category_mapping = parse_mapping_file(category_mapping) if not category: if test != 'paired_T': option_parser.error( 'a category in the category mapping file must be' ' specified with the -c option for this test') # set up threshold value for filtering, if any if threshold and threshold != 'None': threshold = float(threshold) # if specifying a list of OTUs to look at specifically if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None # if only passing in a single OTU table if isdir(otu_table_fp) is False: # raise error if collate option is being passed to single table if collate_results is True: option_parser.error( 'Cannot collate the results of only one table.' ' Please rerun the command without passing the -w option') else: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) # run the statistical test output = test_wrapper( test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() # if the user has passed in a directory if isdir(otu_table_fp) is True: # negate_collate to return an results file on a per table basis if collate_results is False: # build list of otu tables otu_table_paths = glob('%s/*biom' % otu_table_fp) # if output dir doesn't exist, then make it if exists(output_fp): pass else: makedirs(output_fp) for otu_table_fp in otu_table_paths: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) #synchronize the mapping file with the otu table category_mapping, removed_samples = \ sync_mapping_to_otu_table(otu_table, category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # create naming convention for output file # will look like: otu_table_ANOVA_Treatment.txt output_basename = basename(otu_table_fp) output_basename = output_basename.replace(".biom", "") output_fp_sweep = "%s_%s_%s.txt" % \ (output_basename,test,category) # if the convert_otu_table_fp is passed, save the converted table if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper( table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True, individual_column, reference_sample_column) # run test single input table from the directory else: output = test_wrapper( test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output file with new naming convention output_file = open(join(output_fp, output_fp_sweep), 'w') output_file.write('\n'.join(output)) output_file.close() # Use when the input dir contains rarefied OTU tables, and you want # to collate the p-values & results into one results file if collate_results is True: if test != 'longitudinal_correlation' and test != 'paired_T': # get biom tables otu_table_paths = glob('%s/*biom' % otu_table_fp) #get aggregated tables parsed_otu_tables = [] for otu_table_fp in otu_table_paths: otu_table = open(otu_table_fp, 'U') otu_table = parse_biom_table(otu_table) parsed_otu_tables.append(otu_table) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = \ sync_mapping_to_otu_table(parsed_otu_tables[0],category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # get output from statistical test output = test_wrapper_multiple( test, parsed_otu_tables, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) #write out aggregated results output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() else: option_parser.error( "You cannot collate the results obtained from " "using the longitudinal_correlation and paired_T options.")
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp, 'U') category_mapping = parse_mapping_file(category_mapping) individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp, 'U') try: otu_table = parse_biom_table(otu_table) except AttributeError: otu_table = parse_biom_table_str(otu_table) #synchronize the mapping file with the otu table category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper( otu_table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include, \ otu_table_relative_abundance=relative_abundance) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = glob('%s/*biom' % otu_table_fp) # if directory, get aggregated results parsed_otu_tables = [] for path in otu_table_paths: ot = open(path, 'U') ot = parse_biom_table(ot) parsed_otu_tables.append(ot) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' output = test_wrapper_multiple(test, parsed_otu_tables, \ category_mapping, category, threshold, filter, otu_include,\ otu_table_relative_abundance=relative_abundance) else: raise ValueError( "the longitudinal_correlation and paired_T options cannot be run on a directory" ) of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_include_fp = opts.otu_include_fp output_fp = opts.output_fp verbose = opts.verbose category_mapping_fp = opts.category_mapping_fp individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category threshold = opts.threshold collate_results = opts.collate_results # check mapping file category_mapping = open(category_mapping_fp,'U') category_mapping = parse_mapping_file(category_mapping) if not category: if test != 'paired_T': option_parser.error('a category in the category mapping file must be' ' specified with the -c option for this test') # set up threshold value for filtering, if any if threshold and threshold != 'None': threshold = float(threshold) # if specifying a list of OTUs to look at specifically if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None # if only passing in a single OTU table if isdir(otu_table_fp) is False: # raise error if collate option is being passed to single table if collate_results is True: option_parser.error('Cannot collate the results of only one table.' ' Please rerun the command without passing the -w option') else: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) # run the statistical test output = test_wrapper(test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() # if the user has passed in a directory if isdir(otu_table_fp) is True: # negate_collate to return an results file on a per table basis if collate_results is False: # build list of otu tables otu_table_paths = glob('%s/*biom' % otu_table_fp) # if output dir doesn't exist, then make it if exists(output_fp): pass else: makedirs(output_fp) for otu_table_fp in otu_table_paths: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) #synchronize the mapping file with the otu table category_mapping, removed_samples = \ sync_mapping_to_otu_table(otu_table, category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # create naming convention for output file # will look like: otu_table_ANOVA_Treatment.txt output_basename = basename(otu_table_fp) output_basename = output_basename.replace(".biom","") output_fp_sweep = "%s_%s_%s.txt" % \ (output_basename,test,category) # if the convert_otu_table_fp is passed, save the converted table if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True, individual_column, reference_sample_column) # run test single input table from the directory else: output = test_wrapper(test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output file with new naming convention output_file = open(join(output_fp,output_fp_sweep), 'w') output_file.write('\n'.join(output)) output_file.close() # Use when the input dir contains rarefied OTU tables, and you want # to collate the p-values & results into one results file if collate_results is True: if test != 'longitudinal_correlation' and test != 'paired_T': # get biom tables otu_table_paths = glob('%s/*biom' % otu_table_fp) #get aggregated tables parsed_otu_tables= [] for otu_table_fp in otu_table_paths: otu_table = open(otu_table_fp, 'U') otu_table = parse_biom_table(otu_table) parsed_otu_tables.append(otu_table) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = \ sync_mapping_to_otu_table(parsed_otu_tables[0],category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # get output from statistical test output = test_wrapper_multiple(test, parsed_otu_tables, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) #write out aggregated results output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() else: option_parser.error("You cannot collate the results obtained from " "using the longitudinal_correlation and paired_T options.")
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp,'U') individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.conv_output_fp filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp,'U') if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(\ otu_table, category_mapping, individual_column, \ reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(converted_otu_table) of.close() converted_otu_table = converted_otu_table.split('\n') category_mapping = open(category_mapping_fp,'U') if test == 'longitudinal_correlation': output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = [join(otu_table_fp,fp) for fp in \ listdir(otu_table_fp)] # if directory, get aggregated results output = test_wrapper_multiple(test, otu_table_paths, \ category_mapping, category, threshold, filter, otu_include) else: raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory") of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()
otu_table_paths = [fp1,fp2] threshold = None _filter = 0 otu_include = None otu_table1 = open(fp1,'U') otu_table2 = open(fp2,'U') category = 'cat1' # get expected ANOVA output from each file separately results1 = test_wrapper('ANOVA', otu_table1, category_mapping, category, threshold, \ _filter, otu_include=None) results2 = test_wrapper('ANOVA', otu_table2, category_mapping, category, threshold, \ _filter, otu_include=None) results = test_wrapper_multiple('ANOVA', otu_table_paths, category_mapping, category, threshold, _filter, otu_include) # multiple results should be combination of individual results results1 = [result.split('\t') for result in results1] results1_dict = {} for line in results1[1:]: results1_dict[line[0]] = line[1:] results2 = [result.split('\t') for result in results2] results2_dict = {} for line in results2[1:]: results2_dict[line[0]] = line[1:] results = [result.split('\t') for result in results] results_dict = {} for line in results[1:]: results_dict[line[0]] = line[1:]
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp,'U') category_mapping = parse_mapping_file(category_mapping) individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp,'U') try: otu_table = parse_biom_table(otu_table) except AttributeError: otu_table = parse_biom_table_str(otu_table) #synchronize the mapping file with the otu table category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include, \ otu_table_relative_abundance=relative_abundance) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = glob('%s/*biom' % otu_table_fp) # if directory, get aggregated results parsed_otu_tables = [] for path in otu_table_paths: ot = open(path,'U') ot = parse_biom_table(ot) parsed_otu_tables.append(ot) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' output = test_wrapper_multiple(test, parsed_otu_tables, \ category_mapping, category, threshold, filter, otu_include,\ otu_table_relative_abundance=relative_abundance) else: raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory") of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()