Exemplo n.º 1
0
fig.add_subplot(pca_axis)
fig.suptitle(species_name)

low_divergence_pca_axis = plt.Subplot(fig, outer_grid[1])
fig.add_subplot(low_divergence_pca_axis)

# Load subject and sample metadata
sys.stderr.write("Loading HMP metadata...\n")
subject_sample_map = parse_midas_data.parse_subject_sample_map()
sys.stderr.write("Done!\n")

# Load genomic coverage distributions
sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution(
    species_name)
median_coverages = numpy.array([
    stats_utils.calculate_nonzero_median_from_histogram(
        sample_coverage_histogram)
    for sample_coverage_histogram in sample_coverage_histograms
])
sample_coverage_map = {
    samples[i]: median_coverages[i]
    for i in xrange(0, len(samples))
}

# Load pi information for species_name
sys.stderr.write("Loading within-sample diversity for %s...\n" % species_name)
samples, total_pis, total_pi_opportunities = parse_midas_data.parse_within_sample_pi(
    species_name, debug)
sys.stderr.write("Done!\n")
pis = total_pis / total_pi_opportunities

median_coverages = numpy.array(
Exemplo n.º 2
0
max_marker_coverages = marker_gene_coverages.max(axis=0)
num_zero = (marker_gene_coverages<1).sum(axis=0)

high_coverage_idxs = numpy.nonzero(median_marker_coverages>=100)[0]

print marker_genes
print pooled_marker_coverages[high_coverage_idxs[0]], ":", marker_gene_coverages[:,high_coverage_idxs[0]]
print pooled_marker_coverages[high_coverage_idxs[1]], ":", marker_gene_coverages[:,high_coverage_idxs[1]]

print marker_gene_coverages[1,:]

print num_zero

# Load genomic coverage distributions
sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution(species_name)
median_coverages = numpy.array([stats_utils.calculate_nonzero_median_from_histogram(sample_coverage_histogram) for sample_coverage_histogram in sample_coverage_histograms])
sample_coverage_map = {samples[i]: median_coverages[i] for i in xrange(0,len(samples))}


pylab.figure(1)
for i in xrange(0,len(sample_coverage_histograms)):
    sample = samples[i]
    
    if sample not in desired_samples:
        continue
    
    sample_coverage_histogram = sample_coverage_histograms[i]
    x0 = median_coverages[i]
    
    if x0<20:
        continue