def check_hgvs(parser, validator, hdp, hgvscode, jsonfile): """Checks the HGVS code. Prints error and the HGVS code if HGVS is not recognized and saves the error information in the overview directory Args: parser (hgvs.parser.Parser object): Used to parse the HGVS code validator (hgvs.validator.Validator object): Used to validate the parsed HGVS code hgvs (string): Input that is going to be tested Returns: correct (boolean): True if HGVS-Code is correct """ try: variant = parser.parse_hgvs_variant(hgvscode) except hgvs.exceptions.HGVSError as e: if "char 1: expected a letter or digit" in e: return (errorcorrbymut(hgvscode, jsonfile)) vm = hgvs.assemblymapper.AssemblyMapper(hdp, assembly_name='GRCh37', alt_aln_method='splign') var_g = vm.c_to_g(variant) try: correct = validator.validate(hp.parse_hgvs_variant(str(variant))) except Exception: return (False) return (True)
def check_hgvs(parser, validator, hgvs, submitterteam, submitter, overview, file): """Checks the HGVS code. Prints error and the HGVS code if HGVS is not recognized and saves the error information in the overview directory Args: parser (hgvs.parser.Parser object): Used to parse the HGVS code validator (hgvs.validator.Validator object): Used to validate the parsed HGVS code hgvs (string): Input that is going to be tested submitterteam (string): Team of submitters in the overview dictionary submitter (string): Name of submitter overview (dictionary): Dictionary containing the information of previously parsed JSONs file (string): directory of json-file """ try: hgvstest = validator.validate(parser.parse_hgvs_variant(hgvs)) except Exception: if file not in overview[submitterteam]['incorrect JSONs']: overview[submitterteam]['incorrect JSONs'][file] = {} if 'falscher HGVS-Code' not in overview[submitterteam]['incorrect JSONs'][file]: overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'] = {} overview[submitterteam]['incorrect JSONs'][file]['submitter'] = submitter overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "Parsing error" else: overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "Parsing error" if hgvstest == False: if file not in overview[submitterteam]['incorrect JSONs']: overview[submitterteam]['incorrect JSONs'][file] = {} if 'falscher HGVS-Code' not in overview[submitterteam]['incorrect JSONs'][file]: overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'] = {} overview[submitterteam]['incorrect JSONs'][file]['submitter'] = submitter overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "internally incorrect HGVS" else: overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "internally incorrect HGVS"
def parsehgvs(hgvs, parser): parsedhgvs = parser.parse_hgvs_variant(hgvs) chrom = str(int(parsedhgvs.ac.split(".")[0][-2:])) offset = parsedhgvs.posedit.pos.start.offset ref = parsedhgvs.posedit.ref alt = parsedhgvs.posedit.alt return (chrom, offset, ref, alt)
def from_hgvs(SQ, hgvs_expr): parser = hgvs.parser.Parser() sv = parser.parse_hgvs_variant(hgvs_expr) if isinstance(sv.posedit.pos, hgvs.location.BaseOffsetInterval): if sv.posedit.pos.start.is_intronic or sv.posedit.pos.end.is_intronic: raise ValueError( "Intronic HGVS variants are not supported ({sv.posedit})") if sv.posedit.edit.type == 'ins': start = sv.posedit.pos.start.base end = sv.posedit.pos.start.base state = sv.posedit.edit.alt elif sv.posedit.edit.type in ('sub', 'del', 'delins', 'identity'): start = sv.posedit.pos.start.base - 1 end = sv.posedit.pos.end.base state = sv.posedit.edit.alt or '' else: raise ValueError( f"HGVS variant type {sv.posedit.edit.type} is unsupported") location = identifiers.getVSL(SQ, start, end) vsl = 'ga4gh:VSL.' + identifiers.digest( bytes(json.dumps(location), 'utf-8')) allele = identifiers.getVA(vsl, state) va = 'ga4gh:VA.' + identifiers.digest(bytes(json.dumps(allele), 'utf-8')) model = identifiers.assembleJSON([SQ, sv.ac], location, allele, va) return json.dumps(model, ensure_ascii=False, indent=4)
def parse_hgvs(parser, candidate): assert candidate # Normalize single deletions: NM_007300.3:c.1100C>None -> NM_007300.3:c.1100delC candidate = re.sub(r"(NM.*c\.\d*)([ATCGatcg]+)(>None)", r"\1del\2", candidate) # TODO: Normalize multiple deletions and delins # TODO: Limit to only specific BRCA transcripts # ex: 23199084 NM_007294.3:c.2681AA>None|NM_007300.3:c.2681AA>None try: return str(parser.parse_hgvs_variant(candidate)) except hgvs.exceptions.HGVSParseError: print("Failed to parse: {}".format(candidate)) return ""
def normalize_variants(variants, parser, mapper): # Generate normalized genomic and coding hgvs strings # REMIND: This NC_0000<chr>.11 is a hack, will not work correctly # for single digit chromosomes... variants["norm_g_hgvs"] = variants.apply( lambda row: "NC_0000{}.11:g.{}{}>{}".format( row.Chr, row.Pos, row.Ref, row.Alt) if (len(row.Ref) == 1) and (len(row.Alt) == 1) else "NC_0000{}.11:g.{}_{}del{}ins{}".format( row.Chr, row.Pos, row.Pos + len(row.Ref), row.Ref, row.Alt), axis="columns") # Normalize the hgvs string by parsing via the hgvs package variants["norm_g_hgvs"] = variants.apply( lambda row: str(parser.parse_hgvs_variant(row.norm_g_hgvs)), axis="columns") return variants
def hgvs_c_to_g(candidate, parser, mapper): """ Try to parse candidate hgvs coding string from a paper and if successful try and map it to an hgvs genomic string. Returns the parsed coding and mapped genomic hgvs """ print(candidate) # Normalize single deletions: NM_007300.3:c.1100C>None -> NM_007300.3:c.1100del # candidate = re.sub(r"(NM.*c\.\d*)([ATCG]>None)", r"\1del", candidate) # Normalize single deletions: NM_007300.3:c.1100C>None -> NM_007300.3:c.1100delC candidate = re.sub(r"(NM.*c\.\d*)([ATCG])>None", r"\1del\2", candidate) # TODO: Normalize multiple deletions and delins # TODO: Limit to only specific BRCA transcripts # ex: 23199084 NM_007294.3:c.2681AA>None|NM_007300.3:c.2681AA>None print("=> {}".format(candidate)) # Try to parse and map back to genomic try: parsed_hgvs = parser.parse_hgvs_variant(candidate) print("=> {}".format(parsed_hgvs)) try: if parsed_hgvs.type == "c": norm_g_hgvs = mapper.c_to_g(parsed_hgvs) print("=> {}".format(norm_g_hgvs)) return str(parsed_hgvs), str(norm_g_hgvs) else: print("Only coding variants (.c) supported") except hgvs.exceptions.HGVSInvalidVariantError: print("Failed Mapping: HGVSInvalidVariantError") except hgvs.exceptions.HGVSInvalidIntervalError: print("Failed Mapping: HGVSInvalidIntervalError") except hgvs.exceptions.HGVSParseError: print("Failed Parsing") return "", ""