#all_species_gene_changes_category={} all_species_null = {} all_data[species_name] = {} # #################### # Analyze the data # #################### # # Only plot samples above a certain depth threshold that are "haploids" haploid_samples = diversity_utils.calculate_haploid_samples(species_name, debug=debug) # if len(haploid_samples) < min_sample_size: continue # same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs( sample_order_map, haploid_samples) # snp_samples = set() sample_size = 0 for sample_pair_idx in xrange(0, len(same_subject_idxs[0])): # i = same_subject_idxs[0][sample_pair_idx] j = same_subject_idxs[1][sample_pair_idx] # snp_samples.add(haploid_samples[i]) snp_samples.add(haploid_samples[j]) # sample_size += 1 # snp_samples = list(snp_samples) allowed_sample_set = set(snp_samples)
sample_country_map = parse_HMP_data.parse_sample_country_map() sample_order_map = parse_HMP_data.parse_sample_order_map() sys.stderr.write("Done!\n") temporal_samples = diversity_utils.calculate_temporal_samples( species_name, min_coverage=config.min_median_coverage) haploid_samples = set( diversity_utils.calculate_haploid_samples(species_name, debug=debug)) import sfs_utils sys.stderr.write("Loading SFSs for %s...\t" % species_name) samples, sfs_map = parse_midas_data.parse_within_sample_sfs( species_name, allowed_variant_types=set(['1D', '2D', '3D', '4D'])) sys.stderr.write("Done!\n") same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs( sample_order_map, temporal_samples) frequency_bins = numpy.linspace(0, 1, 21) #fs = numpy.array([0.1,0.2,0.3,0.4,0.5,0.5,0.6,0.7,0.8,0.9]) dfs = numpy.array([0.6, 0.7, 0.8, 0.9, 0.98]) perrs = [] for sample_pair_idx in xrange(0, len(same_subject_idxs[0])): i = same_subject_idxs[0][sample_pair_idx] j = same_subject_idxs[1][sample_pair_idx] sample_i = temporal_samples[i]