high_cov_pis = clipped_pis[(median_coverages >= min_coverage)] # get the time info for the snp_samples time_pair_idxs, visno, day = parse_midas_data.calculate_time_pairs( subject_sample_time_map, high_cov_samples) #### time pair idxs where patients can have exactly 1 time point (so that points plotted are iid) time_pair_idxs_unique, visno_snps_genes_unique, day_snps_genes_unique = parse_midas_data.calculate_unique_time_pairs( subject_sample_time_map, high_cov_samples) ### different patient idx: # to compare results to time_pair idxs, we want different patient pair idxs. This helps us to contextualize if we are seeing events within patients that resemble replacements or modifications. # Calculate which pairs of idxs belong to the same sample, which to the same subject # and which to different subjects snp_same_sample_idxs, snp_same_subject_idxs, snp_diff_subject_idxs = parse_midas_data.calculate_subject_pairs( subject_sample_map, high_cov_samples) ########## # Plot: # ########## pylab.figure() pylab.xlabel('First time point') pylab.ylabel('Subsequent time point') pylab.xlim([1e-5, 1e-1]) pylab.ylim([1e-5, 1e-1]) pylab.title(species_name) pylab.loglog(high_cov_pis[time_pair_idxs[0]], high_cov_pis[time_pair_idxs[1]], 'go',
if second_snp_difference_matrix.shape[0]==0: second_snp_difference_matrix = numpy.zeros_like(chunk_snp_difference_matrix)*1.0 second_snp_opportunity_matrix = numpy.zeros_like(second_snp_difference_matrix)*1.0 second_snp_difference_matrix += chunk_snp_difference_matrix second_snp_opportunity_matrix += chunk_snp_opportunity_matrix sys.stderr.write("Done!\n") # Now calculate rates first_divergence_matrix = first_snp_difference_matrix / first_snp_opportunity_matrix second_divergence_matrix = second_snp_difference_matrix / second_snp_opportunity_matrix # Calculate which pairs of idxs belong to the same sample, which to the same subject # and which to different subjects same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_subject_pairs(subject_sample_map, joint_snp_samples) # Set up figure fig = plt.figure(figsize=(3, 2.5)) # Set up grids to hold figure panels outer_grid = gridspec.GridSpec(1, 1) ################### # # SNP Panel # ################### divergence_axis = plt.Subplot(fig, outer_grid[0]) fig.add_subplot(divergence_axis)
# Now need to make the gene samples and snp samples match up desired_samples = gene_samples[marker_coverages > min_coverage] prevalence_idxs = (parse_midas_data.calculate_unique_samples( subject_sample_map, gene_samples)) * (marker_coverages >= min_coverage) prevalences = gene_diversity_utils.calculate_fractional_gene_prevalences( gene_depth_matrix[:, prevalence_idxs], marker_coverages[prevalence_idxs]) pangenome_prevalences = numpy.array(prevalences, copy=True) pangenome_prevalences.sort() # Calculate which pairs of idxs belong to the same sample, which to the same subject # and which to different subjects desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_subject_pairs( subject_sample_map, desired_samples) snp_sample_idx_map = parse_midas_data.calculate_sample_idx_map( desired_samples, snp_samples) gene_sample_idx_map = parse_midas_data.calculate_sample_idx_map( desired_samples, gene_samples) same_sample_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices( snp_sample_idx_map, desired_same_sample_idxs) same_sample_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices( gene_sample_idx_map, desired_same_sample_idxs) same_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices( snp_sample_idx_map, desired_same_subject_idxs)
fixation_matrix_non, persite_fixation_matrix_non = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, variant_type='1D', min_change=min_change) sys.stderr.write("Done!\n") # Only plot samples above a certain depth threshold high_coverage_samples = samples[median_coverages >= min_coverage] high_coverage_low_pi_samples = samples[(median_coverages >= min_coverage) * (pis <= 1e-03)] # Calculate which pairs of idxs belong to the same sample, which to the same subject # and which to different subjects high_coverage_same_sample_idxs, high_coverage_same_subject_idxs, high_coverage_diff_subject_idxs = parse_midas_data.calculate_subject_pairs( subject_sample_map, high_coverage_samples) sample_idx_map = parse_midas_data.calculate_sample_idx_map( high_coverage_samples, samples) same_sample_idxs = parse_midas_data.apply_sample_index_map_to_indices( sample_idx_map, high_coverage_same_sample_idxs) # same_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices( sample_idx_map, high_coverage_same_subject_idxs) # diff_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices( sample_idx_map, high_coverage_diff_subject_idxs) # Calculate which pairs of idxs belong to the same sample, which to the same subject # and which to different subjects