def test_dup(self): valid_dict_tuples = [ ( { "variant_type": "dup", "prefix": "c", "start_position": 77, "end_position": 77, }, "c.77dup", ), ( { "variant_type": "dup", "prefix": "p", "start_position": 12, "start_target": "Pro", "end_position": 18, "end_target": "Gly", }, "p.Pro12_Gly18dup", ), ] for d, s in valid_dict_tuples: with self.subTest(d=d, s=s): self.assertEqual(Variant(s), Variant(d))
def test_delins(self): valid_dict_tuples = [ ( { "variant_type": "delins", "prefix": "c", "start_position": "43-6", "end_position": "595+12", "variant": "CTT", }, "c.43-6_595+12delinsCTT", ), ( { "variant_type": "delins", "prefix": "p", "start_position": 71, "start_target": "Ile", "end_position": 80, "end_target": "Cys", "variant": "Ser", }, "p.Ile71_Cys80delinsSer", ), ] for d, s in valid_dict_tuples: with self.subTest(d=d, s=s): self.assertEqual(Variant(s), Variant(d))
def test_ins(self): valid_dict_tuples = [ ( { "variant_type": "ins", "prefix": "p", "start_position": 12, "start_target": "Ala", "end_position": 13, "end_target": "Pro", "variant": "GlyProCys", }, "p.Ala12_Pro13insGlyProCys", ), ( { "variant_type": "ins", "prefix": "r", "start_position": 22, "end_position": 23, "variant": "auc", }, "r.22_23insauc", ), ] for d, s in valid_dict_tuples: with self.subTest(d=d, s=s): self.assertEqual(Variant(s), Variant(d))
def test_del(self): valid_dict_tuples = [ ( { "variant_type": "del", "prefix": "g", "start_position": 44, "end_position": 44, }, "g.44del", ), ( { "variant_type": "del", "prefix": "c", "start_position": "78+5", "end_position": "78+10", }, "c.78+5_78+10del", ), ] for d, s in valid_dict_tuples: with self.subTest(d=d, s=s): self.assertEqual(Variant(s), Variant(d))
def test_sub(self): valid_dict_tuples = [ ( { "variant_type": "sub", "prefix": "p", "position": 27, "target": "Glu", "variant": "Trp", }, "p.Glu27Trp", ), ( { "variant_type": "sub", "prefix": "c", "position": "122-6", "target": "T", "variant": "A", }, "c.122-6T>A", ), ] for d, s in valid_dict_tuples: with self.subTest(d=d, s=s): self.assertEqual(Variant(s), Variant(d))
def test_equal(self): valid_dict_tuples = [ ( { "variant_type": "equal", "prefix": "p", "position": "27", "target": "Glu", }, "p.Glu27=", ), ( { "variant_type": "equal", "prefix": "c", "start_position": "12", "end_position": "12", }, "c.12=", ), ( { "variant_type": "equal", "prefix": "c", "start_position": "1", "end_position": "3", }, "c.1_3=", ), ] for d, s in valid_dict_tuples: with self.subTest(d=d, s=s): self.assertEqual(Variant(s), Variant(d))
def test_invalid_raises_error(self) -> None: valid_variant_strings = [ "p.Glu27Trp", "c.122-6T>A", "g.44del", "c.78+5_78+10del", "c.77dup", "p.Pro12_Gly18dup", "p.Ala12_Pro13insGlyProCys", "r.22_23insauc", "c.43-6_595+12delinsCTT", "p.Ile71_Cys80delinsSer", "p.=", "c.=", "p.(=)", "c.1_3=", "c.12=", "g.88_99=", "c.43-6_595+12=", ] invalid_variant_strings = [ "g.Glu27Trp", "p.27Glu>Trp", "p.122-6T>A", "G>A", "22G>A", "G.44del", "a.78+5_78+10del", "77dup", "n.Pro12_Gly18dup", "p.Pro12_Gly18insGlyProAla", "g.22_23insauc", "g.25_24del", "g.25_24ins", "r.22_24insauc", "r.43-6_595+12delinsctt", "x.=", "c.(=)", "p.(Gly24=)", "p.Gly24(=)", ] for s in valid_variant_strings: with self.subTest(s=s): Variant(s) # should pass for s in invalid_variant_strings: with self.subTest(s=s): with self.assertRaises(MaveHgvsParseError): Variant(s)
def test_invalid_keys(self): invalid_dicts = [ { "variant_type": "equal", "prefix": "p", "start_position": "27", "end_position": "27", "target": "Glu", }, { "variant_type": "dup", "prefix": "c", "position": 77 }, { "variant_type": "test", "prefix": "c", "start_position": 77, "end_position": 77, }, ] for d in invalid_dicts: with self.subTest(d=d): with self.assertRaises(MaveHgvsParseError): Variant(d)
def test_missing_keys(self): invalid_dicts = [ { "prefix": "p", "position": 27, "target": "Glu", "variant": "Trp" }, { "variant_type": "sub", "position": "122-6", "target": "T", "variant": "A" }, { "variant_type": "delins", "prefix": "p", "start_target": "Ile", "end_position": 80, "end_target": "Cys", "variant": "Ser", }, ] for d in invalid_dicts: with self.subTest(d=d): with self.assertRaises(MaveHgvsParseError): Variant(d)
def test_invalid_type(self): invalid_values = [1234, None, 5.55, ("p", "Ile", 80, "Cys")] for v in invalid_values: with self.subTest(v=v): with self.assertRaises(ValueError): Variant(v)
def test_valid_dna_delins(self): variant_tuples = [("ACGT", "c.1_2delinsA"), ("ACGT", "c.4delinsTAAGC")] for target, s in variant_tuples: with self.subTest(target=target, s=s): v = Variant(s, targetseq=target) self.assertEqual(s, str(v))
def parse_variant_strings( variants: List[str] ) -> Tuple[List[Optional[Variant]], List[Optional[str]]]: """Parse a list of MAVE-HGVS strings into Variant objects or error messages. Parameters ---------- variants : List[str] List of MAVE-HGVS strings to parse. Returns ------- Tuple[List[Optional[Variant]], List[Optional[str]]] Returns a pair of lists containing variants or error messages. Both lists have the same length as the input list. The first list contains Variant objects if the string was successfully parsed; else None. The second list contains None if the string was successfully parsed; else the error message. """ valid = list() invalid = list() for s in variants: try: v = Variant(s) except MaveHgvsParseError as error: valid.append(None) invalid.append(str(error)) else: valid.append(v) invalid.append(None) return valid, invalid
def test_invalid_dna_delins(self): variant_tuples = [("ACGT", "c.4_5delinsA"), ("ACGT", "c.10_delinsTCG")] for target, s in variant_tuples: with self.subTest(target=target, s=s): with self.assertRaises(MaveHgvsParseError): Variant(s, targetseq=target)
def test_invalid_protein_equal(self): variant_tuples = [("RCQY", "p.Trp5=")] for target, s in variant_tuples: with self.subTest(target=target, s=s): with self.assertRaises(MaveHgvsParseError): Variant(s, targetseq=target)
def test_valid_protein_equal(self): variant_tuples = [("RCQY", "p.Arg1="), ("RCQY", "p.Tyr4="), ("RCQY", "p.=")] for target, s in variant_tuples: with self.subTest(target=target, s=s): v = Variant(s, targetseq=target) self.assertEqual(s, str(v))
def test_prefix(self): variant_tuples = [(prefix, f"{prefix}.=") for prefix in tuple("gmo" "cn" "r")] for p, s in variant_tuples: with self.subTest(p=p, s=s): v = Variant(s) self.assertEqual(p, v.prefix)
def test_target_id(self): variant_tuples = [ (None, "p.Glu27Trp"), (None, "c.122-6T>A"), ("GeneX", "GeneX:p.Glu27Trp"), ("YFG1", "YFG1:c.122-6T>A"), ("ENST00000471181.7", "ENST00000471181.7:c.122-6T>A"), ("NM_007294.4", "NM_007294.4:c.122-6T>A"), ] for t, s in variant_tuples: with self.subTest(t=t, s=s): v = Variant(s) self.assertEqual(t, v.target_id) for _, s in variant_tuples: with self.subTest(s=s): v = Variant(s) self.assertEqual(s, str(v))
def test_skips_extended(self): variant_tuples = [ ("ACGT", "c.1+3A>T"), ("ACGT", "c.*33G>C"), ("ACGT", "c.43-6_595+12delinsCTT"), ] for target, s in variant_tuples: with self.subTest(target=target, s=s): v = Variant(s, targetseq=target) self.assertEqual(s, str(v))
def test_matching_protein_substitution(self): variant_tuples = [ ("RCQY", "p.Arg1Ala"), ("RCQY", "p.Gln3Trp"), ("RCQY", "p.[Arg1Ala;Gln3Trp]"), ] for target, s in variant_tuples: with self.subTest(target=target, s=s): v = Variant(s, targetseq=target) self.assertEqual(s, str(v))
def test_invalid_dna_dup(self): variant_tuples = [ ("ACGT", "c.1_5dup"), ("ACGT", "c.6_8dup"), ("ACGT", "c.7dup"), ] for target, s in variant_tuples: with self.subTest(target=target, s=s): with self.assertRaises(MaveHgvsParseError): Variant(s, targetseq=target)
def test_matching_dna_substitution(self): variant_tuples = [ ("ACGT", "c.1A>T"), ("ACGT", "c.3G>C"), ("ACGT", "c.[1A>T;3G>C]"), ] for target, s in variant_tuples: with self.subTest(target=target, s=s): v = Variant(s, targetseq=target) self.assertEqual(s, str(v))
def test_nonmatching_dna_substitution(self): variant_tuples = [ ("ACGT", "c.1C>T"), ("ACGT", "c.3T>C"), ("ACGT", "c.[1A>T;3T>C]"), ("ACGT", "c.5A>G"), ] for target, s in variant_tuples: with self.subTest(target=target, s=s): with self.assertRaises(MaveHgvsParseError): Variant(s, targetseq=target)
def test_nonmatching_protein_substitution(self): variant_tuples = [ ("RCQY", "p.Cys1Ala"), ("RCQY", "p.Ala3Trp"), ("RCQY", "p.[Arg1Ala;Cys3Trp]"), ("RCQY", "p.Asp5Glu"), ] for target, s in variant_tuples: with self.subTest(target=target, s=s): with self.assertRaises(MaveHgvsParseError): Variant(s, targetseq=target)
def test_uses_extended_positions(self): non_extended_variant_strings = [ "p.Glu27Trp", "g.44del", "c.77dup", "p.Pro12_Gly18dup", "p.Ala12_Pro13insGlyProCys", "r.22_23insauc", "r.22g>u", "p.Ile71_Cys80delinsSer", "p.=", "p.[Pro12_Gly18dup;Glu27Trp]", "r.[22g>u;35del]", ] extended_variant_strings = [ "c.122-6T>A", "c.78+5_78+10del", "c.43-6_595+12delinsCTT", "c.*33G>C", "r.33+12a>c", "c.[12G>T;122-6T>A]", "c.[43-6_595+12delinsCTT;*33G>C]", ] for s in non_extended_variant_strings: with self.subTest(s=s): v = Variant(s) self.assertFalse(v.uses_extended_positions()) for s in extended_variant_strings: with self.subTest(s=s): v = Variant(s) self.assertTrue(v.uses_extended_positions())
def test_is_multi_variant(self): single_variant_strings = [ "p.Glu27Trp", "c.122-6T>A", "g.44del", "c.78+5_78+10del", "c.77dup", "p.Pro12_Gly18dup", "p.Ala12_Pro13insGlyProCys", "r.22_23insauc", "c.43-6_595+12delinsCTT", "p.Ile71_Cys80delinsSer", "p.=", ] multi_variant_strings = [] for s in single_variant_strings: with self.subTest(s=s): v = Variant(s) self.assertFalse(v.is_multi_variant()) for s in multi_variant_strings: with self.subTest(s=s): v = Variant(s) self.assertTrue(v.is_multi_variant())
def test_target_identical(self) -> None: identical_variant_strings = [ *[f"{prefix}.=" for prefix in tuple("gmo" "cn" "r")], "p.(=)", "c.1_3=", ] non_identical_variant_strings = [ "p.Ter345Lys", "p.Cys22=", "g.48C>A", "c.122-6T>A", "g.22delinsAACG", "c.83_85delinsT", ] for s in identical_variant_strings: with self.subTest(s=s): v = Variant(s) self.assertTrue(v.is_target_identical()) for s in non_identical_variant_strings: with self.subTest(s=s): v = Variant(s) self.assertFalse(v.is_target_identical())
def test_del(self) -> None: variant_strings = [ "g.44del", "c.78+5_78+10del", "c.1_95del", "p.Gly18del", "p.Gln7_Asn19del", "r.34_36del", ] for s in variant_strings: with self.subTest(s=s): v = Variant(s) self.assertEqual(s, str(v))
def test_dup(self) -> None: variant_strings = [ "g.22_24dup", "c.77dup", "c.101+1_101+7dup", "p.Pro12_Gly18dup", "p.Cys5dup", "r.12dup", ] for s in variant_strings: with self.subTest(s=s): v = Variant(s) self.assertEqual(s, str(v))
def test_delins(self) -> None: variant_strings = [ "g.22delinsAACG", "c.83_85delinsT", "c.43-6_595+12delinsCTT", "p.Ile71_Cys80delinsSer", "p.His44delinsValProGlyGlu", "r.92delinsgac", ] for s in variant_strings: with self.subTest(s=s): v = Variant(s) self.assertEqual(s, str(v))
def test_ordering(self): variant_string_tuples = [ ("p.[Gly345Lys;Glu27Trp]", "p.[Glu27Trp;Gly345Lys]"), ("p.[Glu27Trp;Gly18del;Ter345Lys]", "p.[Gly18del;Glu27Trp;Ter345Lys]"), ("c.[122T>A;1_35del;78+5_78+10del]", "c.[1_35del;78+5_78+10del;122T>A]"), ] for s, _ in variant_string_tuples: with self.subTest(s=s): with self.assertRaises(MaveHgvsParseError): Variant(s, relaxed_ordering=False) for s, s_ordered in variant_string_tuples: with self.subTest(s=s): # Should pass creation Variant(s, relaxed_ordering=True) for s, s_ordered in variant_string_tuples: with self.subTest(s=s): v = Variant(s, relaxed_ordering=True) self.assertEqual(s_ordered, str(v))