same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_subject_pairs( subject_sample_map, snp_samples) # diff_subject_snp_plowers = [] diff_subject_snp_puppers = [] diff_subject_gene_plowers = [] diff_subject_gene_puppers = [] between_host_gene_idx_map = {} low_divergence_between_host_gene_idx_map = {} for sample_pair_idx in xrange(0, len(diff_subject_idxs[0])): # snp_i = diff_subject_idxs[0][sample_pair_idx] snp_j = diff_subject_idxs[1][sample_pair_idx] # plower, pupper = stats_utils.calculate_poisson_rate_interval( snp_difference_matrix[snp_i, snp_j], snp_opportunity_matrix[snp_i, snp_j], alpha=0.05) # diff_subject_snp_plowers.append(plower) diff_subject_snp_puppers.append(pupper) # # clip lower bounds diff_subject_snp_plowers = numpy.clip(diff_subject_snp_plowers, 1e-07, 1e09) # Sort all four lists by ascending lower bound on SNP changes, then gene changes diff_subject_snp_plowers, diff_subject_snp_puppers = ( numpy.array(x) for x in zip( *sorted(zip(diff_subject_snp_plowers, diff_subject_snp_puppers)))) ############################################################################## #
#print typical_same_subject_snp_opportunities, typical_diff_subject_snp_opportunities #print typical_same_subject_gene_opportunities, typical_diff_subject_gene_opportunities Lsnps = typical_diff_subject_snp_opportunities Lgenes = typical_diff_subject_gene_opportunities same_subject_snp_plowers = [] same_subject_snp_puppers = [] same_subject_gene_plowers = [] same_subject_gene_puppers = [] for sample_pair_idx in xrange(0, len(same_subject_snp_idxs[0])): i = same_subject_snp_idxs[0][sample_pair_idx] j = same_subject_snp_idxs[1][sample_pair_idx] plower, pupper = stats_utils.calculate_poisson_rate_interval( snp_difference_matrix[i, j], snp_opportunity_matrix[i, j], alpha) same_subject_snp_plowers.append(plower) same_subject_snp_puppers.append(pupper) snp_differences = diversity_utils.calculate_snp_differences_between( i, j, allele_counts_map, passed_sites_map, min_change=min_change) i = same_subject_gene_idxs[0][sample_pair_idx] j = same_subject_gene_idxs[1][sample_pair_idx] gene_differences = gene_diversity_utils.calculate_gene_differences_between( i, j, gene_depth_matrix, marker_coverages, min_log2_fold_change=4) plower, pupper = stats_utils.calculate_poisson_rate_interval( gene_difference_matrix[i, j], gene_opportunity_matrix[i, j])
median_depths = [] depth_lowers = [] depth_uppers = [] # Now do polymorphism axis for sample in highcoverage_samples: within_sites, between_sites, total_sites = sfs_utils.calculate_polymorphism_rates_from_sfs_map( sfs_map[sample]) within_rate = within_sites * 1.0 / total_sites between_rate = between_sites * 1.0 / total_sites between_rates.append(between_rate) within_rates.append(within_rate) # Calculate 95% confidence intervals within_rate_lower, within_rate_upper = stats_utils.calculate_poisson_rate_interval( within_sites, total_sites, alpha=0.05) within_rate_lowers.append(within_rate_lower) within_rate_uppers.append(within_rate_upper) depths, counts = sfs_utils.calculate_depth_distribution_from_sfs_map( sfs_map[sample]) dlower, dupper = stats_utils.calculate_IQR_from_distribution( depths, counts) dmedian = stats_utils.calculate_median_from_distribution( depths, counts) depth_lowers.append(dlower) depth_uppers.append(dupper) median_depths.append(dmedian) sample_names.append(sample)
for sample_pair_idx in xrange(0, len(same_subject_snp_idxs[0])): i = same_subject_snp_idxs[0][sample_pair_idx] j = same_subject_snp_idxs[1][sample_pair_idx] i2 = desired_same_subject_idxs[0][sample_pair_idx] j2 = desired_same_subject_idxs[1][sample_pair_idx] if pis[i2] > pis[j2]: pi_idx = i2 else: pi_idx = j2 # Calculate max pi between two samples plower, pupper = stats_utils.calculate_poisson_rate_interval( total_pis[pi_idx], total_pi_opportunities[pi_idx], alpha) pmid = total_pis[pi_idx] * 1.0 / total_pi_opportunities[pi_idx] same_subject_pi_plowers.append(plower) same_subject_pi_pmids.append(pmid) same_subject_pi_puppers.append(pupper) # Calculate SNP changes plower, pupper = stats_utils.calculate_poisson_rate_interval( snp_difference_matrix[i, j], snp_opportunity_matrix[i, j], alpha) pmid = snp_difference_matrix[i, j] * 1.0 / snp_opportunity_matrix[i, j] same_subject_snp_plowers.append(plower) same_subject_snp_pmids.append(pmid) same_subject_snp_puppers.append(pupper)