high_cov_pis = clipped_pis[(median_coverages >= min_coverage)]

# get the time info for the snp_samples
time_pair_idxs, visno, day = parse_midas_data.calculate_time_pairs(
    subject_sample_time_map, high_cov_samples)

#### time pair idxs where patients can have exactly 1 time point (so that points plotted are iid)
time_pair_idxs_unique, visno_snps_genes_unique, day_snps_genes_unique = parse_midas_data.calculate_unique_time_pairs(
    subject_sample_time_map, high_cov_samples)

### different patient idx:
# to compare results to time_pair idxs, we want different patient pair idxs. This helps us to contextualize if we are seeing events within patients that resemble replacements or modifications.

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
snp_same_sample_idxs, snp_same_subject_idxs, snp_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(
    subject_sample_map, high_cov_samples)

##########
# Plot:  #
##########

pylab.figure()
pylab.xlabel('First time point')
pylab.ylabel('Subsequent time point')
pylab.xlim([1e-5, 1e-1])
pylab.ylim([1e-5, 1e-1])
pylab.title(species_name)

pylab.loglog(high_cov_pis[time_pair_idxs[0]],
             high_cov_pis[time_pair_idxs[1]],
             'go',
    if second_snp_difference_matrix.shape[0]==0:
        second_snp_difference_matrix = numpy.zeros_like(chunk_snp_difference_matrix)*1.0
        second_snp_opportunity_matrix = numpy.zeros_like(second_snp_difference_matrix)*1.0
    
    second_snp_difference_matrix += chunk_snp_difference_matrix
    second_snp_opportunity_matrix += chunk_snp_opportunity_matrix

sys.stderr.write("Done!\n")   

# Now calculate rates
first_divergence_matrix = first_snp_difference_matrix /  first_snp_opportunity_matrix
second_divergence_matrix = second_snp_difference_matrix /  second_snp_opportunity_matrix
     
# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_subject_pairs(subject_sample_map, joint_snp_samples)

# Set up figure
fig = plt.figure(figsize=(3, 2.5))

# Set up grids to hold figure panels
outer_grid = gridspec.GridSpec(1, 1)

###################
#
# SNP Panel
#
###################

divergence_axis = plt.Subplot(fig, outer_grid[0])
fig.add_subplot(divergence_axis)
# Now need to make the gene samples and snp samples match up
desired_samples = gene_samples[marker_coverages > min_coverage]

prevalence_idxs = (parse_midas_data.calculate_unique_samples(
    subject_sample_map, gene_samples)) * (marker_coverages >= min_coverage)

prevalences = gene_diversity_utils.calculate_fractional_gene_prevalences(
    gene_depth_matrix[:, prevalence_idxs], marker_coverages[prevalence_idxs])

pangenome_prevalences = numpy.array(prevalences, copy=True)
pangenome_prevalences.sort()

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(
    subject_sample_map, desired_samples)

snp_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    desired_samples, snp_samples)
gene_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    desired_samples, gene_samples)

same_sample_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_same_sample_idxs)

same_sample_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    gene_sample_idx_map, desired_same_sample_idxs)

same_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_same_subject_idxs)
示例#4
0
fixation_matrix_non, persite_fixation_matrix_non = diversity_utils.calculate_fixation_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='1D',
    min_change=min_change)
sys.stderr.write("Done!\n")

# Only plot samples above a certain depth threshold
high_coverage_samples = samples[median_coverages >= min_coverage]

high_coverage_low_pi_samples = samples[(median_coverages >= min_coverage) *
                                       (pis <= 1e-03)]

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
high_coverage_same_sample_idxs, high_coverage_same_subject_idxs, high_coverage_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(
    subject_sample_map, high_coverage_samples)

sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    high_coverage_samples, samples)

same_sample_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_same_sample_idxs)
#
same_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_same_subject_idxs)
#
diff_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_diff_subject_idxs)

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects