Пример #1
0
def interpret_impact(args, var):
    """
    Interpret the report from SnpEff or VEP to determine the impact of the variant.
    
    SnpEff examples:
    0    NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|Aca/Gca|T/A|OR4F5|protein_coding|CODING|ENST00000335137|exon_1_69091_70008),
    1    NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|Aca/Gca|T/A|OR4F5|protein_coding|CODING|ENST00000534990|exon_1_69037_69829)
    
    VEP examples
    CSQ: Consequence|Codons|Amino_acids|Gene|hgnc|Feature|EXON|polyphen|sift
    non_synonymous_codon|gaT/gaG|D/E|ENSG00000116254|CHD5|ENST00000378006|18/25|benign(0.011)|tolerated(0.3)
    nc_transcript_variant|||ENSG00000116254|CHD5|ENST00000491020|5/6||
    """
    impact_all = [
    ]  # holds a list of all the transcript impacts for this variant
    effect_strings_str = ""
    effect_strings = []
    counter = 0  # counter for anno_id
    if args.anno_type == "snpEff":
        try:
            effect_strings_str = var.INFO["EFF"]
            effect_strings = effect_strings_str.split(",")
        except KeyError:
            if var.INFO.has_key("SNPEFF_EFFECT"):
                impact_all.append(snpEff.gatk_effect_details(var.INFO))
            else:
                sys.stderr.write("WARNING: The input VCF has no snpEFF annotations. " \
                                 "Variant impact will be set to unknown\n")

        for effect_string in effect_strings:
            counter += 1
            eff_pieces = snpEff.eff_search.findall(effect_string)
            for piece in eff_pieces:
                impact_string = piece[
                    0]  # the predicted inpact, which is outside the ()
                impact_detail = piece[
                    1]  # all the other information, which is inside the ()
                impact_info = snpEff.effect_map[impact_string]
                impact_details = snpEff.EffectDetails(impact_string,
                                                      impact_info.priority,
                                                      impact_detail, counter,
                                                      args.maj_version)
                impact_all.append(impact_details)

    elif args.anno_type == "VEP":
        try:
            effect_strings_str = var.INFO["CSQ"]
            effect_strings = effect_strings_str.split(",")
        except KeyError:
            sys.stderr.write("WARNING: The input VCF has no VEP annotations. \
                             Variant impact will be set to unknown\n")

        for effect_string in effect_strings:

            # nc_transcript_variant&intron_variant|||ENSG00000243485|MIR1302-11|ENST00000
            each_string = effect_string.split("|")
            if "&" in each_string[0]:
                impact_strings = each_string[0].split("&")
                # impact_strings will be [nc_transcript_variant, intron_variant]
                for impact_string in impact_strings:
                    counter += 1
                    try:
                        impact_info = vep.effect_map[impact_string]
                        impact_details = vep.EffectDetails(
                            impact_string, impact_info.priority, effect_string,
                            counter)
                    except KeyError:
                        impact_details = vep.EffectDetails(
                            impact_string, None, effect_string, counter)
                    impact_all.append(impact_details)
            # we expect VEP to produce a valid impact label for each_string[0]
            elif "&" not in each_string[0]:
                counter += 1
                impact_string = each_string[0]
                impact_info = vep.effect_map.get(impact_string)
                try:
                    impact_details = vep.EffectDetails(impact_string,
                                                       impact_info.priority,
                                                       effect_string, counter)
                except AttributeError:
                    impact_details = vep.EffectDetails(impact_string, None,
                                                       effect_string, counter)
                impact_all.append(impact_details)
    else:
        #should not get here, as the valid -t options should be handled
        #in main()
        sys.exit("ERROR: Unsupported variant annotation type.\n")

    return impact_all
Пример #2
0
def interpret_severe_impact(args, var):
    """
    Interpret the report from SnpEff or VEP to determine the impact of the variant.
    
    SnpEff examples:
    0    NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|Aca/Gca|T/A|OR4F5|protein_coding|CODING|ENST00000335137|exon_1_69091_70008),
    1    NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|Aca/Gca|T/A|OR4F5|protein_coding|CODING|ENST00000534990|exon_1_69037_69829)
    
    VEP examples
    CSQ: Consequence|Codons|Amino_acids|Gene|hgnc|Feature|EXON|polyphen|sift
    non_synonymous_codon|gaT/gaG|D/E|ENSG00000116254|CHD5|ENST00000378006|18/25|benign(0.011)|tolerated(0.3)
    nc_transcript_variant|||ENSG00000116254|CHD5|ENST00000491020|5/6||
    """

    effect_strings_str = ""
    effect_strings = []
    impact_details = None
    max_severity = 9  # initialize to a value greater than the largest value in impact info priority code
    count = 0  # initialize counter for anno_id

    if args.anno_type == "snpEff":
        try:
            effect_strings_str = var.INFO["EFF"]
            effect_strings = effect_strings_str.split(",")
            #print len(effect_strings)
        except KeyError:
            if var.INFO.has_key("SNPEFF_EFFECT"):
                impact_details = snpEff.gatk_effect_details(var.INFO)
            else:
                sys.stderr.write("WARNING: The input VCF has no snpEFF annotations. " \
                                 "Variant impact will be set to unknown\n")

        for effect_string in effect_strings:
            count += 1
            eff_pieces = snpEff.eff_search.findall(effect_string)
            for piece in eff_pieces:
                impact_string = piece[
                    0]  # the predicted inpact, which is outside the ()
                impact_detail = piece[
                    1]  # all the other information, which is inside the ()
                impact_info = snpEff.effect_map[impact_string]
                #update the impact stored only if a higher severity transcript is encountered
                if impact_info.priority_code < max_severity:
                    impact_type = impact_info
                    impact_details = snpEff.EffectDetails(
                        impact_string, impact_info.priority, impact_detail,
                        count, args.maj_version)
                    biotype = impact_details.biotype
                    priority = impact_info.priority_code
                    max_severity = impact_info.priority_code  # store the current "winning" severity for the next iteration.
                else:
                    pass

                if (biotype == "protein_coding") and (priority
                                                      == max_severity):
                    pass
                elif (biotype != "protein_coding") and (priority
                                                        == max_severity):
                    impact_type = impact_info
                    impact_details = snpEff.EffectDetails(
                        impact_string, impact_info.priority, impact_detail,
                        count, args.maj_version)
                    biotype = impact_details.biotype
                    priority = impact_info.priority_code
                    max_severity = impact_info.priority_code  # store the current "winning" severity for the next iteration.

    elif args.anno_type == "VEP":
        try:
            effect_strings_str = var.INFO["CSQ"]
            effect_strings = effect_strings_str.split(",")
        except KeyError:
            sys.stderr.write("WARNING: The input VCF has no VEP annotations. \
                             Variant impact will be set to unknown\n")

        for effect_string in effect_strings:
            # nc_transcript_variant&intron_variant|||ENSG00000243485|MIR1302-11|ENST00000
            each_string = effect_string.split("|")

            if "&" in each_string[0]:
                impact_strings = each_string[0].split("&")
                # impact_strings will be [nc_transcript_variant, intron_variant]
                for impact_string in impact_strings:
                    count += 1
                    try:
                        impact_info = vep.effect_map[impact_string]
                        #update the impact stored only if a higher severity transcript is encountered
                        if impact_info.priority_code < max_severity:
                            impact_type = impact_info
                            impact_details = vep.EffectDetails(
                                impact_string, impact_info.priority,
                                effect_string, count)
                            max_severity = impact_info.priority_code  # store the current "winning" severity for the next iteration.
                    except KeyError:
                        pass

            # we expect VEP to produce a valid impact label for each_string[0]
            elif "&" not in each_string[0]:
                count += 1
                impact_string = each_string[0]
                impact_info = vep.effect_map.get(impact_string)
                if impact_info is None:
                    pass
                elif impact_info.priority_code < max_severity:
                    impact_type = impact_info
                    impact_details = vep.EffectDetails(impact_string,
                                                       impact_info.priority,
                                                       effect_string, count)
                    max_severity = impact_info.priority_code  # initialize the max_severity to the former value of priority code

    else:
        #should not get here, as the valid -t options should be handled
        #in main()
        sys.exit("ERROR: Unsupported variant annotation type.\n")

    return impact_details