def tabix(args): """ Annotates primers with SNP information. """ vcf_in = VariantFile(args.vcf) p_info = pt.create_tabix_df(args.p_info) p_left = pt.primer_range_left(p_info["Sequence ID"], p_info["Primer Rank"], p_info["Chromosome"], p_info["Primer Left Seq"], p_info["Position1"]) p_right = pt.primer_range_right(p_info["Sequence ID"], p_info["Primer Rank"], p_info["Chromosome"], p_info["Primer Right Seq"], p_info["Position2"]) pn_left = pt.match_pinfo_to_vcf(p_left, vcf_in) pn_right = pt.match_pinfo_to_vcf(p_right, vcf_in) left_snps = pt.tabix_fetch(pn_left["Sequence ID"], pn_left["Primer Rank"], pn_left["Chromosome"], pn_left["Position1"], pn_left["Position2"], vcf_in) right_snps = pt.tabix_fetch(pn_right["Sequence ID"], pn_right["Primer Rank"], pn_right["Chromosome"], pn_right["Position1"], pn_right["Position2"], vcf_in) left_df = pt.tabix_results_to_df(left_snps, "L", "Left SNP Count") right_df = pt.tabix_results_to_df(right_snps, "R", "Right SNP Count") merged_df = pt.merge_left_right(left_df, right_df, p_info) merged_df.to_csv(args.output, index=False)
def test_merge_left_right(self): """ Merge the left and right SNP dataframes. """ left = pt.primer_range_left(self.pvcf['Sequence ID'], self.pvcf['Primer Rank'], self.pvcf['Chromosome'], self.pvcf['Primer Left Seq'], self.pvcf['Position1']) normalized = pt.match_pinfo_to_vcf(left, self.vcf_in) left_snps = pt.tabix_fetch(normalized['Sequence ID'], normalized['Primer Rank'], normalized['Chromosome'], normalized['Position1'], normalized['Position2'], self.vcf_in) left_df = pt.tabix_results_to_df(left_snps, "L", "Left SNP Count") right = pt.primer_range_right(self.pvcf['Sequence ID'], self.pvcf['Primer Rank'], self.pvcf['Chromosome'], self.pvcf['Primer Right Seq'], self.pvcf['Position2']) normalized = pt.match_pinfo_to_vcf(right, self.vcf_in) right_snps = pt.tabix_fetch(normalized['Sequence ID'], normalized['Primer Rank'], normalized['Chromosome'], normalized['Position1'], normalized['Position2'], self.vcf_in) right_df = pt.tabix_results_to_df(left_snps, "R", "Right SNP Count") merged_df = pt.merge_left_right(left_df, right_df, self.pvcf) self.assertTrue('Left SNP Count' and 'Right SNP Count' in merged_df.columns)
def test_primer_range_right(self): """ Test the range info is added to the dataframe. """ self.assertEqual( len( pt.primer_range_left(self.pinfo['Sequence ID'], self.pinfo['Primer Rank'], self.pinfo['Chromosome'], self.pinfo['Primer Right Seq'], self.pinfo['Position2']).columns), 6)
def test_tabix_fetch(self): """ Fetches snp info and assigns to primers. """ left = pt.primer_range_left(self.pvcf['Sequence ID'], self.pvcf['Primer Rank'], self.pvcf['Chromosome'], self.pvcf['Primer Left Seq'], self.pvcf['Position1']) normalized = pt.match_pinfo_to_vcf(left, self.vcf_in) left_snps = pt.tabix_fetch(normalized['Sequence ID'], normalized['Primer Rank'], normalized['Chromosome'], normalized['Position1'], normalized['Position2'], self.vcf_in) self.assertEqual(len(left_snps), 20)
def test_tabix_results_to_df(self): """ Generates a pd.DataFrame from the tabix results. """ left = pt.primer_range_left(self.pvcf['Sequence ID'], self.pvcf['Primer Rank'], self.pvcf['Chromosome'], self.pvcf['Primer Left Seq'], self.pvcf['Position1']) normalized = pt.match_pinfo_to_vcf(left, self.vcf_in) left_snps = pt.tabix_fetch(normalized['Sequence ID'], normalized['Primer Rank'], normalized['Chromosome'], normalized['Position1'], normalized['Position2'], self.vcf_in) left_df = pt.tabix_results_to_df(left_snps, "L", "Left SNP Count") self.assertEqual(left_df['Left SNP Count'][0], 10)