def test_make_slices_default_settings(self): """Test slicing function with default settings: 500 bp slices""" for count, i in enumerate(VCF.get_slice_indicies(self.bgzip_path, regions=None, window_size=500)): if count > 10: break self.assertEqual(i, ('Chr01', 5501, 6000))
def test_heterozyogote_outgroup_calling(self): vcf_line = self.vcf_line vcf_line['out1']['GT'] = '1/1' vcf_line['out2']['GT'] = '1/0' het = VCF.process_outgroup(vcf_line, self.populations) self.assertEqual(het, None)
def test_diff_homozygotes_outgroups_calling(self): vcf_line = self.vcf_line vcf_line['out1']['GT'] = '1/1' vcf_line['out2']['GT'] = '0/0' diff = VCF.process_outgroup(vcf_line, self.populations) self.assertEqual(diff, None)
def test_make_slices_default_with_params_set(self): """Test slicing function with window_size set""" for count, i in enumerate(VCF.get_slice_indicies(self.bgzip_path, regions=None, window_size=1008)): if count > 10: break self.assertEqual(i, ('Chr01', 11089, 12096))
def test_home_alt_outgroup_calling(self): vcf_line = self.vcf_line vcf_line['out1']['GT'] = '1/1' vcf_line['out2']['GT'] = '1/1' homo_alt = VCF.process_outgroup(vcf_line, self.populations) self.assertEqual(homo_alt, '1')
def test_header_vs_population_sample_ids(self): """Check that the sample IDs parsed from the population arguement match those in the VCF file. NOTE: In practice the populations arguement can contain fewer samples and populations than actually contained in the VCF file. """ header = VCF.make_empty_vcf_ordered_dict(self.bgzip_path, ) header_sample_ids = [item for count, item in enumerate(header) if count >= 9] populations_dict = VCF.parse_populations_list(self.populations_list) populations_sample_ids = [i for l in populations_dict.values() for i in l] # Check both unique IDs and equal length self.assertEqual(set(header_sample_ids), set(populations_sample_ids)) self.assertEqual(len(header_sample_ids), len(populations_sample_ids))
def test_population_string_parsing(self): populations = VCF.parse_populations_list(self.populations_list) self.assertEqual(populations, {'melpo': ['m523', 'm524', 'm525', 'm589', 'm675', 'm676', 'm682', 'm683', 'm687', 'm689'], 'pachi': ['p516', 'p517', 'p518', 'p519', 'p520', 'p591', 'p596', 'p690', 'p694', 'p696'], 'cydno': ['c511', 'c512', 'c513', 'c514', 'c515', 'c563', 'c614', 'c630', 'c639', 'c640'], 'outgroups': ['h665', 'i02-210']})
def test_calc_fstats_normal_allele_counts(self): f_statistics = VCF.calc_fstats(self.normal_allele_counts) self.assertEqual( {'G_prime_st_est': 0.9140159767610748, 'D_est': 0.7581699346405228, 'G_double_prime_st_est': 0.9477124183006534, 'Gst_est': 0.6444444444444445, 'Hs_est': 0.1729729729729729, 'Ht_est': 0.48648648648648646}, f_statistics[('pachi', 'outgroups')])
def test_trivial_calc_multilocus_f_statistics(self): ml_stats = VCF.calc_multilocus_f_statistics(self.trivial_Hs_est_dict, self.trivial_Ht_est_dict) self.assertEqual({('pop1', 'pop2'): {'G_prime_st_est': 0.0, 'Gst_est.stdev': 0.0, 'G_double_prime_st_est.stdev': 0.0, 'G_double_prime_st_est': 0.0, 'Gst_est': 0.0, 'D_est.stdev': 0.0, 'G_prime_st_est.stdev': 0.0, 'D_est': 0.0}}, ml_stats)
def test_trivial_2_calc_multilocus_f_statistics(self): ml_stats = VCF.calc_multilocus_f_statistics(self.trivial_2_Hs_est_dict, self.trivial_2_Ht_est_dict) self.assertEqual({('pop1', 'pop2'): {'G_prime_st_est': 0.8333333333333334, 'Gst_est.stdev': 0.09999999999999998, 'G_double_prime_st_est.stdev': 0.060586734693877375, 'G_double_prime_st_est': 0.8888888888888888, 'Gst_est': 0.5, 'D_est.stdev': 0.08928571428571419, 'G_prime_st_est.stdev': 0.07857142857142851, 'D_est': 0.6488650338184054}}, ml_stats)
def test_calc_multilocus_f_statistics(self): ml_stats = VCF.calc_multilocus_f_statistics(self.Hs_est_dict, self.Ht_est_dict) self.assertEqual({('pop1', 'pop2'): {'Gst_est': 0.30312672938572266, 'Gst_est.stdev': 1.3360742705570265, 'G_double_prime_st_est': 0.5003506191636901, 'G_double_prime_st_est.stdev': 2.394045897494315, 'G_prime_st_est': 0.34889353651117816, 'G_prime_st_est.stdev': 1.6091544573608876, 'D_est': 0.0015629851350084287, 'D_est.stdev': 0.25110803099568757}}, ml_stats)
def test_calc_fstats_trivial_allele_counts(self): f_statistics = VCF.calc_fstats(self.trivial_allele_counts) self.assertEqual( {('pop2', 'pop1'): {'G_prime_st_est': 0.6803049722304385, 'D_est': 0.517460317460318, 'G_double_prime_st_est': 0.7609710550887027, 'Gst_est': 0.33747412008281613, 'Hs_est': 0.3368421052631577, 'Ht_est': 0.508421052631579}}, f_statistics)
def callSNPs(current_base, numb_of_seqs): """Call the SNPs. Duh!""" blanks = np.zeros(numb_of_seqs, np.string0) if current_base.FILTER == 'LowQual': blanks.fill("-") if current_base.FORMAT == 'GT': blanks.fill("-") for count, snp_call in enumerate(current_base[9:]): base = VCF.process_snp_call(snp_call, current_base.REF, current_base.ALT) blanks[count] = base return blanks
def callSNPs(current_base, numb_of_seqs, IUPAC_ambiguities=True): """Call the SNPs. Duh!""" blanks = np.zeros(numb_of_seqs, np.string0) #print current_base.REF, current_base.ALT if current_base.FILTER == 'LowQual': blanks.fill("-") #elif current_base.FORMAT == 'GT': # blanks.fill("-") elif len(current_base.ALT) > 1 or len(current_base.REF) > 1: blanks.fill("-") else: for count, snp_call in enumerate(current_base[9:]): base = VCF.process_snp_call(snp_call, current_base.REF, current_base.ALT, IUPAC_ambiguities=True) blanks[count] = base return blanks
def test_homo_alt_genotype_calling(self): homo_alt = VCF.process_snp_call('1/1:10,9:19:99:254,0,337', 'A', 'T', IUPAC_ambiguities=True) self.assertEqual(homo_alt, 'T')
def test_double_alt_genotype_calling(self): double_alt = VCF.process_snp_call('1/2:10,9:19:99:254,0,337', 'A', 'T,G', IUPAC_ambiguities=True) self.assertEqual(double_alt, 'K')
def test_header_to_ordered_dict_parsing(self): header = VCF.make_empty_vcf_ordered_dict(self.bgzip_path) self.assertEqual(header, self.header_dict)
def test_heterozygote_as_N_genotype_calling(self): heterozygote_as_N = VCF.process_snp_call('0/1:10,9:19:99:254,0,337', 'A', 'T', IUPAC_ambiguities=False) self.assertEqual(heterozygote_as_N, 'N')
def test_double_alt_het_as_N_genotype_calling(self): double_alt_het_as_N = VCF.process_snp_call('1/2:10,9:19:99:254,0,337', 'A', 'T,G', IUPAC_ambiguities=False) self.assertEqual(double_alt_het_as_N, 'N')
def test_home_ref_outgroup_calling(self): homo_ref = VCF.process_outgroup(self.vcf_line, self.populations) self.assertEqual(homo_ref, '0')
def test_second_alt_genotype_calling(self): second_alt = VCF.process_snp_call('0/2:10,9:19:99:254,0,337', 'A', 'T,G', IUPAC_ambiguities=True) self.assertEqual(second_alt, 'R')