Пример #1
0
 def test_no_variants_returns_ref_chroms(self, mock_var_file,
                                         mock_load_fasta):
     recs = []
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("AGCAGCCCCGGG", converter._get_string())
Пример #2
0
 def test_one_variant_chroms_with_no_vars_in_same_order(
         self, mock_var_file, mock_load_fasta):
     recs = [_MockVcfRecord(2, "G", ["CAAA", "CA"], chrom="ref3")]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("AGCAGCCCCG5G6CAAA6CA6G", converter._get_string())
 def test_filter_pass_record_kept(self, mock_var_file, mock_load_fasta):
     recs = [_MockVcfRecord(2, "A", "G",
                            chrom="JAC")]  # Default filter: PASS
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual(converter._get_string(), "A5A6G6C")
Пример #4
0
 def test_adjacent_snps_kept(self, mock_var_file, mock_load_fasta):
     recs = [
         _MockVcfRecord(1, "C", ["G"], chrom="ref2"),
         _MockVcfRecord(2, "C", ["A"], chrom="ref2"),
     ]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("AGCAGC5C6G67C8A8CGGG", converter._get_string())
Пример #5
0
 def test_two_snps_same_chrom(self, mock_var_file, mock_load_fasta):
     recs = [
         _MockVcfRecord(1, "A", "G", chrom="ref1"),
         _MockVcfRecord(3, "C", ["T", "G"], chrom="ref1"),
     ]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("5A6G6G7C8T8G8AGCCCCGGG", converter._get_string())
Пример #6
0
 def test_snp_inside_del(self, mock_var_file, mock_load_fasta):
     recs = [
         _MockVcfRecord(2, "T", ["G"], chrom="ref1"),
         _MockVcfRecord(2, "T", ["C"], chrom="ref1"),
     ]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("T5T6G6TTCCC", converter._get_string())
Пример #7
0
 def test_snps_at_same_position(self, mock_var_file, mock_load_fasta):
     recs = [
         _MockVcfRecord(1, "TTTT", ["T"], chrom="ref1"),
         _MockVcfRecord(2, "T", ["C"], chrom="ref1"),
     ]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("5TTTT6T6CCC", converter._get_string())
Пример #8
0
 def test_one_ins_and_one_del_diff_chroms(self, mock_var_file,
                                          mock_load_fasta):
     recs = [
         _MockVcfRecord(3, "C", ["CGG"], chrom="ref1"),
         _MockVcfRecord(1, "CCC", ["C"], chrom="ref2"),
     ]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual("AG5C6CGG6AGC7CCC8C8GGG", converter._get_string())
Пример #9
0
def build_from_vcfs(report, action, build_paths, args):
    """Calls utility that converts a vcf and fasta reference into a linear prg."""
    if args.no_vcf_clustering:
        _skip_cluster_vcf_records(report, "skip_vcf_record_clustering",
                                  build_paths)
    else:
        # We also do this if only a single one is provided, to deal with overlapping records.
        cluster_vcf_records(report, "vcf_record_clustering", build_paths)

    built_vcf = build_paths.built_vcf
    log.info(f"Running {action} on {built_vcf}")

    converter = Vcf_to_prg(built_vcf,
                           build_paths.ref,
                           build_paths.prg,
                           mode="normal")
    converter._write_bytes()

    num_recs_in_vcf = _count_vcf_record_lines(built_vcf)
    assert num_recs_in_vcf == converter.num_sites, log.error(
        f"Mismatch between number of vcf records in {built_vcf}"
        f"({num_recs_in_vcf} and number of variant sites in"
        f"PRG string ({converter.num_sites}.\n"
        f"Please report this to developers.")
Пример #10
0
 def test_nonACGT_fails_to_convert(self, mock_var_file, mock_load_fasta):
     mock_var_file.return_value.fetch.return_value = iter([])
     mock_load_fasta.return_value = {}
     converter = Vcf_to_prg("", "", "")
     with self.assertRaises(ValueError):
         converter._to_bytes("N")
Пример #11
0
 def test_filter_nonpass_record_skipped(self, mock_var_file,
                                        mock_load_fasta):
     recs = [_MockVcfRecord(2, "A", "G", filter={"LOW_QUAL": ""})]
     mock_var_file.return_value.fetch.return_value = iter(recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
Пример #12
0
 def test_integer_representation(self, mock_var_file, mock_load_fasta):
     mock_var_file.return_value.fetch.return_value = iter(self.recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "")
     self.assertEqual([5, 1, 6, 3, 6, 2, 1, 2, 7, 1, 8, 1, 1, 1, 8, 1],
                      converter._get_ints())
Пример #13
0
 def test_legacy_representation(self, mock_var_file, mock_load_fasta):
     mock_var_file.return_value.fetch.return_value = iter(self.recs)
     mock_load_fasta.return_value = self.chroms
     converter = Vcf_to_prg("", "", "", mode="legacy")
     self.assertEqual("5A6G5CAC7A8AAA7A", converter._get_string())
 def _run(self, mode="normal", check=False):
     converter = Vcf_to_prg(str(self.vcf_file),
                            str(self.ref_file),
                            self.outfile_prefix,
                            mode=mode)
     converter._write_string()