inconsistency_axis.get_xaxis().tick_bottom() inconsistency_axis.get_yaxis().tick_left() inconsistency_axis.set_xlabel('Maximum divergence age of SNV, $d_B^*$') inconsistency_axis.set_ylabel('Phylogenetic inconsistency between\nSNVs & core-genome divergence') inconsistency_axis.set_xlim([2e-05,2e-02]) inconsistency_axis.set_ylim([0,1.05]) passed_species = [] sample_sizes = [] for species_name in good_species_list: sys.stderr.write("Loading haploid samples...\n") # Only plot samples above a certain depth threshold that are "haploids" snp_samples = diversity_utils.calculate_haploid_samples(species_name, debug=debug) if len(snp_samples) < min_sample_size: sys.stderr.write("Not enough haploid samples!\n") continue sys.stderr.write("Calculating unique samples...\n") # Only consider one sample per person snp_samples = snp_samples[sample_utils.calculate_unique_samples(subject_sample_map, sample_list=snp_samples)] if len(snp_samples) < min_sample_size: sys.stderr.write("Not enough unique samples!\n") continue # Load inconsistency data
species_name) median_coverages = numpy.array([ stats_utils.calculate_nonzero_median_from_histogram( sample_coverage_histogram) for sample_coverage_histogram in sample_coverage_histograms ]) sample_coverage_map = { samples[i]: median_coverages[i] for i in xrange(0, len(samples)) } samples = numpy.array(samples) highcoverage_samples = set( diversity_utils.calculate_highcoverage_samples(species_name)) qp_samples = set( diversity_utils.calculate_haploid_samples(species_name)) non_qp_samples = list(highcoverage_samples - qp_samples) num_samples = 6 replace = False if len(non_qp_samples) < num_samples: replace = True target_samples = choice(non_qp_samples, num_samples, replace) sample_sfs_grid = gridspec.GridSpecFromSubplotSpec( 1, num_samples, width_ratios=[1] * 6, subplot_spec=species_sfs_grid[species_idx], wspace=0.1)