def get_single_subject_idxs(self): sub_sam_map = parse_HMP_data.parse_subject_sample_map() sam_sub_map = sample_utils.calculate_sample_subject_map(sub_sam_map) subs = list(map(sam_sub_map.get, self.good_samples)) seen = set() good_idxs = [] for i, sub in enumerate(subs): if sub not in seen: good_idxs.append(i) seen.add(sub) return np.array(good_idxs)
other_species_str = "" ################################################################################ min_coverage = config.min_median_coverage alpha = 0.5 # Confidence interval range for rate estimates low_pi_threshold = 1e-03 clade_divergence_threshold = 1e-02 modification_divergence_threshold = 1e-03 min_change = 0.8 include_high_copynum = False #include_high_copynum = True # Load subject and sample metadata sys.stderr.write("Loading sample metadata...\n") subject_sample_map = parse_HMP_data.parse_subject_sample_map() sample_country_map = parse_HMP_data.parse_sample_country_map() sample_order_map = parse_HMP_data.parse_sample_order_map() sys.stderr.write("Done!\n") # Only plot samples above a certain depth threshold that are involved in timecourse snp_samples = diversity_utils.calculate_temporal_samples(species_name) # The subset of samples that are haploid haploid_samples = set(diversity_utils.calculate_haploid_samples(species_name)) # Only use the subset from North America # The only temporal samples are from here, best not contaminate the between-subject # comparisons with out of sample effects #snp_samples = snp_samples[parse_HMP_data.calculate_country_samples(sample_country_map, sample_list=snp_samples, allowed_countries=set(["United States"]))]