Пример #1
0
def check_hgvs(parser, validator, hdp, hgvscode, jsonfile):
    """Checks the HGVS code. Prints error and the HGVS code if HGVS is not recognized and saves
    the error information in the overview directory

    Args:
        parser (hgvs.parser.Parser object): Used to parse the HGVS code
        validator (hgvs.validator.Validator object): Used to validate the parsed HGVS code
        hgvs (string): Input that is going to be tested

    Returns:
        correct (boolean): True if HGVS-Code is correct
    """
    try:
        variant = parser.parse_hgvs_variant(hgvscode)
    except hgvs.exceptions.HGVSError as e:
        if "char 1: expected a letter or digit" in e:
            return (errorcorrbymut(hgvscode, jsonfile))

    vm = hgvs.assemblymapper.AssemblyMapper(hdp,
                                            assembly_name='GRCh37',
                                            alt_aln_method='splign')
    var_g = vm.c_to_g(variant)
    try:
        correct = validator.validate(hp.parse_hgvs_variant(str(variant)))
    except Exception:
        return (False)

    return (True)
Пример #2
0
def check_hgvs(parser, validator, hgvs, submitterteam, submitter, overview, file):
    """Checks the HGVS code. Prints error and the HGVS code if HGVS is not recognized and saves
    the error information in the overview directory

    Args:
        parser (hgvs.parser.Parser object): Used to parse the HGVS code
        validator (hgvs.validator.Validator object): Used to validate the parsed HGVS code
        hgvs (string): Input that is going to be tested
        submitterteam (string): Team of submitters in the overview dictionary
        submitter (string): Name of submitter
        overview (dictionary): Dictionary containing the information of previously parsed JSONs
        file (string): directory of json-file

    """
    try:
        hgvstest = validator.validate(parser.parse_hgvs_variant(hgvs))
    except Exception:
        if file not in overview[submitterteam]['incorrect JSONs']:
            overview[submitterteam]['incorrect JSONs'][file] = {}
        if 'falscher HGVS-Code' not in overview[submitterteam]['incorrect JSONs'][file]:
            overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'] = {}
            overview[submitterteam]['incorrect JSONs'][file]['submitter'] = submitter
            overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "Parsing error"
        else:
            overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "Parsing error"

    if hgvstest == False:
        if file not in overview[submitterteam]['incorrect JSONs']:
            overview[submitterteam]['incorrect JSONs'][file] = {}
        if 'falscher HGVS-Code' not in overview[submitterteam]['incorrect JSONs'][file]:
            overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'] = {}
            overview[submitterteam]['incorrect JSONs'][file]['submitter'] = submitter
            overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "internally incorrect HGVS"
        else:
            overview[submitterteam]['incorrect JSONs'][file]['falscher HGVS-Code'][hgvs] = "internally incorrect HGVS"
Пример #3
0
def parsehgvs(hgvs, parser):
    parsedhgvs = parser.parse_hgvs_variant(hgvs)
    chrom = str(int(parsedhgvs.ac.split(".")[0][-2:]))
    offset = parsedhgvs.posedit.pos.start.offset
    ref = parsedhgvs.posedit.ref
    alt = parsedhgvs.posedit.alt
    return (chrom, offset, ref, alt)
Пример #4
0
def from_hgvs(SQ, hgvs_expr):
    parser = hgvs.parser.Parser()
    sv = parser.parse_hgvs_variant(hgvs_expr)

    if isinstance(sv.posedit.pos, hgvs.location.BaseOffsetInterval):
        if sv.posedit.pos.start.is_intronic or sv.posedit.pos.end.is_intronic:
            raise ValueError(
                "Intronic HGVS variants are not supported ({sv.posedit})")

    if sv.posedit.edit.type == 'ins':
        start = sv.posedit.pos.start.base
        end = sv.posedit.pos.start.base
        state = sv.posedit.edit.alt

    elif sv.posedit.edit.type in ('sub', 'del', 'delins', 'identity'):
        start = sv.posedit.pos.start.base - 1
        end = sv.posedit.pos.end.base
        state = sv.posedit.edit.alt or ''

    else:
        raise ValueError(
            f"HGVS variant type {sv.posedit.edit.type} is unsupported")

    location = identifiers.getVSL(SQ, start, end)
    vsl = 'ga4gh:VSL.' + identifiers.digest(
        bytes(json.dumps(location), 'utf-8'))
    allele = identifiers.getVA(vsl, state)
    va = 'ga4gh:VA.' + identifiers.digest(bytes(json.dumps(allele), 'utf-8'))
    model = identifiers.assembleJSON([SQ, sv.ac], location, allele, va)

    return json.dumps(model, ensure_ascii=False, indent=4)
Пример #5
0
def parse_hgvs(parser, candidate):
    assert candidate

    # Normalize single deletions: NM_007300.3:c.1100C>None -> NM_007300.3:c.1100delC
    candidate = re.sub(r"(NM.*c\.\d*)([ATCGatcg]+)(>None)", r"\1del\2", candidate)

    # TODO: Normalize multiple deletions and delins
    # TODO: Limit to only specific BRCA transcripts
    # ex: 23199084	NM_007294.3:c.2681AA>None|NM_007300.3:c.2681AA>None

    try:
        return str(parser.parse_hgvs_variant(candidate))
    except hgvs.exceptions.HGVSParseError:
        print("Failed to parse: {}".format(candidate))

    return ""
Пример #6
0
def normalize_variants(variants, parser, mapper):
    # Generate normalized genomic and coding hgvs strings
    # REMIND: This NC_0000<chr>.11 is a hack, will not work correctly
    # for single digit chromosomes...
    variants["norm_g_hgvs"] = variants.apply(
        lambda row: "NC_0000{}.11:g.{}{}>{}".format(
            row.Chr, row.Pos, row.Ref, row.Alt) if (len(row.Ref) == 1) and
        (len(row.Alt) == 1) else "NC_0000{}.11:g.{}_{}del{}ins{}".format(
            row.Chr, row.Pos, row.Pos + len(row.Ref), row.Ref, row.Alt),
        axis="columns")
    # Normalize the hgvs string by parsing via the hgvs package
    variants["norm_g_hgvs"] = variants.apply(
        lambda row: str(parser.parse_hgvs_variant(row.norm_g_hgvs)),
        axis="columns")

    return variants
Пример #7
0
def hgvs_c_to_g(candidate, parser, mapper):
    """
    Try to parse candidate hgvs coding string from a paper and if
    successful try and map it to an hgvs genomic string.
    Returns the parsed coding and mapped genomic hgvs
    """
    print(candidate)
    # Normalize single deletions: NM_007300.3:c.1100C>None -> NM_007300.3:c.1100del
    # candidate = re.sub(r"(NM.*c\.\d*)([ATCG]>None)", r"\1del", candidate)
    # Normalize single deletions: NM_007300.3:c.1100C>None -> NM_007300.3:c.1100delC
    candidate = re.sub(r"(NM.*c\.\d*)([ATCG])>None", r"\1del\2", candidate)

    # TODO: Normalize multiple deletions and delins
    # TODO: Limit to only specific BRCA transcripts
    # ex: 23199084	NM_007294.3:c.2681AA>None|NM_007300.3:c.2681AA>None

    print("=> {}".format(candidate))
    # Try to parse and map back to genomic
    try:
        parsed_hgvs = parser.parse_hgvs_variant(candidate)
        print("=> {}".format(parsed_hgvs))
        try:
            if parsed_hgvs.type == "c":
                norm_g_hgvs = mapper.c_to_g(parsed_hgvs)
                print("=> {}".format(norm_g_hgvs))
                return str(parsed_hgvs), str(norm_g_hgvs)
            else:
                print("Only coding variants (.c) supported")
        except hgvs.exceptions.HGVSInvalidVariantError:
            print("Failed Mapping: HGVSInvalidVariantError")
        except hgvs.exceptions.HGVSInvalidIntervalError:
            print("Failed Mapping: HGVSInvalidIntervalError")
    except hgvs.exceptions.HGVSParseError:
        print("Failed Parsing")

    return "", ""