median_coverages = numpy.array([
    stats_utils.calculate_median_from_histogram(sample_coverage_histogram)
    for sample_coverage_histogram in sample_coverage_histograms
])
sample_coverage_map = {
    samples[i]: median_coverages[i]
    for i in xrange(0, len(samples))
}

###############################################################
# Compute Pi within patients to figure out which are haploid  #
###############################################################

# Load pi information for species_name
sys.stderr.write("Loading within-sample diversity for %s...\n" % species_name)
samples, total_pis, total_pi_opportunities = parse_midas_data.parse_within_sample_pi(
    species_name, debug)
sys.stderr.write("Done!\n")
pis = total_pis / total_pi_opportunities
clipped_pis = (total_pis + 1) / (total_pi_opportunities + 1)

median_coverages = numpy.array(
    [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))])

###############################################################
# Indexes for SNP samples that have low piS and high coverage #
###############################################################

# Only plot samples above a certain depth threshold that are "haploids"
high_cov_samples = samples[(median_coverages >= min_coverage)]
high_cov_pis = clipped_pis[(median_coverages >= min_coverage)]
    for i in xrange(0, len(samples))
}

# prune time meta data so that the highest coverage sample is retained for those subjects with >1 sample per time pt
subject_sample_time_map = parse_midas_data.prune_subject_sample_time_map(
    subject_sample_time_map_all_samples, sample_coverage_map)

###############################################################
# Compute Pi within patients to figure out which are haploid  #
###############################################################

# Load pi information for species_name
sys.stderr.write("Loading within-sample diversity for %s...\n" % species_name)
samples, total_pis, total_pi_opportunities = parse_midas_data.parse_within_sample_pi(
    species_name,
    allowed_variant_types=set(['4D']),
    allowed_genes=core_genes,
    debug=debug)
sys.stderr.write("Done!\n")
pis = total_pis / total_pi_opportunities

######################
# compute median cov #
######################

median_coverages = numpy.array(
    [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))])

##########################################################
# load SNP info
##########################################################
# Load genomic coverage distributions
sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution(
    species_name)
median_coverages = numpy.array([
    stats_utils.calculate_nonzero_median_from_histogram(
        sample_coverage_histogram)
    for sample_coverage_histogram in sample_coverage_histograms
])
sample_coverage_map = {
    samples[i]: median_coverages[i]
    for i in xrange(0, len(samples))
}

# Load pi information for species_name
sys.stderr.write("Loading within-sample diversity for %s...\n" % species_name)
samples, total_pis, total_pi_opportunities = parse_midas_data.parse_within_sample_pi(
    species_name, allowed_genes=core_genes, debug=debug)
sys.stderr.write("Done!\n")
pis = total_pis / total_pi_opportunities

median_coverages = numpy.array(
    [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))])

# Only plot samples above a certain depth threshold that are "haploids"
snp_samples = samples[(median_coverages >= min_coverage) * (pis <= 1e-03)]

# Load gene coverage information for species_name
sys.stderr.write("Loading pangenome data for %s...\n" % species_name)
gene_samples, gene_names, gene_presence_matrix, gene_depth_matrix, marker_coverages, gene_reads_matrix = parse_midas_data.parse_pangenome_data(
    species_name, allowed_samples=snp_samples)
sys.stderr.write("Done!\n")