def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading sequencing depth table: ",opts.input_seq_depth_file scaling_factors = {} for sample_id,depth in parse_seq_count_file(open(opts.input_seq_depth_file,'U')): scaling_factors[sample_id]=depth ext=path.splitext(opts.input_count_table)[1] if opts.verbose: print "Loading count table: ", opts.input_count_table if (ext == '.gz'): genome_table = parse_biom_table(gzip.open(opts.input_count_table,'rb')) else: genome_table = parse_biom_table(open(opts.input_count_table,'U')) if opts.verbose: print "Scaling the metagenome..." scaled_metagenomes = scale_metagenomes(genome_table,scaling_factors) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) open(opts.output_metagenome_table,'w').write(format_biom_table(scaled_metagenomes))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading sequencing depth table: ", opts.input_seq_depth_file scaling_factors = {} for sample_id, depth in parse_seq_count_file( open(opts.input_seq_depth_file, 'U')): scaling_factors[sample_id] = depth ext = path.splitext(opts.input_count_table)[1] if opts.verbose: print "Loading count table: ", opts.input_count_table if (ext == '.gz'): genome_table = parse_biom_table(gzip.open(opts.input_count_table, 'rb')) else: genome_table = parse_biom_table(open(opts.input_count_table, 'U')) if opts.verbose: print "Scaling the metagenome..." scaled_metagenomes = scale_metagenomes(genome_table, scaling_factors) if opts.verbose: print "Writing results to output file: ", opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) open(opts.output_metagenome_table, 'w').write(format_biom_table(scaled_metagenomes))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.verbose: print "Loading sequencing depth table: ", opts.input_seq_depth_file scaling_factors = {} for sample_id, depth in parse_seq_count_file(open(opts.input_seq_depth_file, "U")): scaling_factors[sample_id] = depth if opts.verbose: print "Loading count table: ", opts.input_count_table genome_table = load_table(opts.input_count_table) if opts.verbose: print "Scaling the metagenome..." scaled_metagenomes = scale_metagenomes(genome_table, scaling_factors) if opts.verbose: print "Writing results to output file: ", opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) write_biom_table(scaled_metagenomes, opts.output_metagenome_table)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading sequencing depth table: ",opts.input_seq_depth_file scaling_factors = {} for sample_id,depth in parse_seq_count_file(open(opts.input_seq_depth_file,'U')): scaling_factors[sample_id]=depth if opts.verbose: print "Loading count table: ", opts.input_count_table genome_table = load_table(opts.input_count_table) if opts.verbose: print "Scaling the metagenome..." scaled_metagenomes = scale_metagenomes(genome_table,scaling_factors) if opts.verbose: print "Writing results to output file: ",opts.output_metagenome_table make_output_dir_for_file(opts.output_metagenome_table) write_biom_table(scaled_metagenomes, opts.output_metagenome_table)
else: #If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table, genome_table, whole_round=round_flag) if opts.normalize_by_otu: #normalize (e.g. divide) the abundances by the sum of the OTUs per sample if opts.verbose: print "Normalizing functional abundances by sum of OTUs per sample" inverse_otu_sums = [1 / x for x in otu_table.sum(axis='sample')] scaling_factors = dict(zip(otu_table.ids(), inverse_otu_sums)) predicted_metagenomes = scale_metagenomes(predicted_metagenomes, scaling_factors) if opts.normalize_by_function: #normalize (e.g. divide) the abundances by the sum of the functions per sample #Sum of functional abundances per sample will equal 1 (e.g. relative abundance). if opts.verbose: print "Normalizing functional abundances by sum of functions per sample" predicted_metagenomes = predicted_metagenomes.norm(axis='sample', inplace=False) write_metagenome_to_file(predicted_metagenomes,opts.output_metagenome_table,\ opts.format_tab_delimited,"metagenome prediction",verbose=opts.verbose) if opts.with_confidence: output_path, output_filename = split(opts.output_metagenome_table)
predicted_metagenomes_lower_CI_95,predicted_metagenomes_upper_CI_95=\ predict_metagenome_variances(otu_table,genome_table,variance_table) else: #If we don't need confidence intervals, we can do a faster pure numpy prediction if opts.verbose: print "Predicting the metagenome..." predicted_metagenomes = predict_metagenomes(otu_table,genome_table) if opts.normalize_by_otu: #normalize (e.g. divide) the abundances by the sum of the OTUs per sample if opts.verbose: print "Normalizing functional abundances by sum of OTUs per sample" inverse_otu_sums = [1/x for x in otu_table.sum(axis='sample')] scaling_factors = dict(zip(otu_table.SampleIds,inverse_otu_sums)) predicted_metagenomes = scale_metagenomes(predicted_metagenomes,scaling_factors) if opts.normalize_by_function: #normalize (e.g. divide) the abundances by the sum of the functions per sample #Sum of functional abundances per sample will equal 1 (e.g. relative abundance). if opts.verbose: print "Normalizing functional abundances by sum of functions per sample" predicted_metagenomes = predicted_metagenomes.normObservationBySample() write_metagenome_to_file(predicted_metagenomes,opts.output_metagenome_table,\ opts.format_tab_delimited,"metagenome prediction",verbose=opts.verbose) if opts.with_confidence: output_path,output_filename = split(opts.output_metagenome_table) base_output_filename,ext = splitext(output_filename)