import diversity_utils import stats_utils species_name = sys.argv[1] min_change = 0.8 # load list of metaphlan2 genes metaphlan2_genes = parse_midas_data.load_metaphlan2_genes(species_name) # Load subject and sample metadata sys.stderr.write("Loading HMP metadata...\n") subject_sample_map = parse_midas_data.parse_subject_sample_map() sys.stderr.write("Done!\n") # Load genomic coverage distributions sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution( species_name, combination_type="sample") median_coverages = numpy.array([ stats_utils.calculate_median_from_histogram(sample_coverage_histogram) for sample_coverage_histogram in sample_coverage_histograms ]) sample_coverage_map = { samples[i]: median_coverages[i] for i in xrange(0, len(samples)) } # Load SNP information for species_name sys.stderr.write("Loading %s...\n" % species_name) samples, allele_counts_map, passed_sites_map = parse_midas_data.parse_snps( species_name, combination_type="sample", debug=False) sys.stderr.write("Done!\n")
import diversity_utils import stats_utils from numpy.random import choice species_name = sys.argv[1] debug = True min_coverage = 20 # Load subject and sample metadata sys.stderr.write("Loading HMP metadata...\n") subject_sample_map = parse_midas_data.parse_subject_sample_map() sys.stderr.write("Done!\n") ### # Load genomic coverage distributions sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution( species_name) median_coverages = numpy.array([ stats_utils.calculate_nonzero_median_from_histogram( sample_coverage_histogram) for sample_coverage_histogram in sample_coverage_histograms ]) sample_coverage_map = { samples[i]: median_coverages[i] for i in xrange(0, len(samples)) } # Load pi information for species_name sys.stderr.write("Loading within-sample diversity for %s...\n" % species_name) samples, total_pis, total_pi_opportunities = parse_midas_data.parse_within_sample_pi( species_name, debug) sys.stderr.write("Done!\n")