Пример #1
0
def test_gene_predictions_two_genes():
    ## GIVEN a empty list of genes
    gene = {
        "hgnc_symbol": "AAA",
        "sift_prediction": "deleterious",
        "polyphen_prediction": "probably_damaging",
        "region_annotation": "exonic",
        "functional_annotation": "missense_variant",
        "spliceai_score": 0.17,
        "spliceai_position": -4,
        "spliceai_prediction": ["ds 0.17 dp -4"],
    }
    gene2 = {
        "hgnc_symbol": "BBB",
        "sift_prediction": "tolerated",
        "polyphen_prediction": "unknown",
        "region_annotation": "exonic",
        "functional_annotation": "synonymous_variant",
        "spliceai_score": 0.9,
        "spliceai_position": 5,
        "spliceai_prediction": ["ds 0.9 dp 5"],
    }
    genes = [gene, gene2]

    ## WHEN parsing the gene predictions
    res = predictions(genes)
    ## THEN assert the result is not filled
    assert set(res["sift_predictions"]) == set(["AAA:deleterious", "BBB:tolerated"])
Пример #2
0
def test_gene_predictions_one_gene():
    ## GIVEN a list with one gene
    gene = {
        "sift_prediction": "deleterious",
        "polyphen_prediction": "probably_damaging",
        "region_annotation": "exonic",
        "functional_annotation": "missense_variant",
        "spliceai_score": 0.17,
        "spliceai_position": -4,
        "spliceai_prediction": ["ds 0.17 dp -4"],
    }
    genes = [gene]

    ## WHEN parsing the gene predictions
    res = predictions(genes)
    ## THEN assert the result is filled
    assert res == {
        "sift_predictions": ["deleterious"],
        "polyphen_predictions": ["probably_damaging"],
        "region_annotations": ["exonic"],
        "functional_annotations": ["missense_variant"],
        "spliceai_scores": [0.17],
        "spliceai_positions": [-4],
        "spliceai_predictions": [["ds 0.17 dp -4"]],
    }
Пример #3
0
def gene_variants(store,
                  pymongo_cursor,
                  variant_count,
                  institute_id,
                  page=1,
                  per_page=50):
    """Pre-process list of variants."""

    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = pymongo_cursor.skip(skip_count).limit(per_page)
    my_institutes = set(inst["_id"]
                        for inst in user_institutes(store, current_user))
    variants = []

    for variant_obj in variant_res:
        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj["case_id"])
        if not variant_case_obj:
            # A variant with missing case was encountered
            continue
        case_display_name = variant_case_obj.get("display_name")
        variant_obj["case_display_name"] = case_display_name

        # hide other institutes for now
        other_institutes = set([variant_case_obj.get("owner")])
        other_institutes.update(set(variant_case_obj.get("collaborators", [])))
        if my_institutes.isdisjoint(other_institutes):
            # If the user does not have access to the information we skip it
            continue

        genome_build = get_genome_build(variant_case_obj)
        variant_genes = variant_obj.get("genes")
        gene_object = update_HGNC_symbols(store, variant_genes, genome_build)

        # Populate variant HGVS and predictions
        variant_genes = variant_obj.get("genes")
        hgvs_c = []
        hgvs_p = []
        if variant_genes is not None:
            for gene_obj in variant_genes:
                hgnc_id = gene_obj["hgnc_id"]
                gene_symbol = gene(store, hgnc_id)["symbol"]
                gene_symbols = [gene_symbol]

                # gather HGVS info from gene transcripts
                (hgvs_nucleotide, hgvs_protein) = get_hgvs(gene_obj)
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                variant_obj["hgvs"] = hgvs_str(gene_symbols, hgvs_p, hgvs_c)

            # populate variant predictions for display
            variant_obj.update(predictions(variant_genes))

        variants.append(variant_obj)

    return {"variants": variants, "more_variants": more_variants}
Пример #4
0
def test_gene_predictions_no_info():
    ## GIVEN a empty list of genes
    genes = []

    ## WHEN parsing the gene predictions
    res = predictions(genes)
    ## THEN assert the result is not filled
    assert res == {
        "sift_predictions": [],
        "polyphen_predictions": [],
        "region_annotations": [],
        "functional_annotations": [],
    }
Пример #5
0
def test_gene_predictions_one_gene_no_sift():
    ## GIVEN a empty list of genes
    gene = {
        "hgnc_symbol": "AAA",
        "polyphen_prediction": "probably_damaging",
        "region_annotation": "exonic",
        "functional_annotation": "missense_variant",
    }
    genes = [gene]

    ## WHEN parsing the gene predictions
    res = predictions(genes)
    ## THEN assert the result is not filled
    assert res == {
        "sift_predictions": ["-"],
        "polyphen_predictions": ["probably_damaging"],
        "region_annotations": ["exonic"],
        "functional_annotations": ["missense_variant"],
    }
Пример #6
0
def gene_variants(store, pymongo_cursor, variant_count, page=1, per_page=50):
    """Pre-process list of variants."""

    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = pymongo_cursor.skip(skip_count).limit(per_page)
    variants = []

    for variant_obj in variant_res:
        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj["case_id"])
        case_display_name = variant_case_obj.get("display_name")
        variant_obj["case_display_name"] = case_display_name

        genome_build = get_genome_build(variant_case_obj)
        variant_genes = variant_obj.get("genes")
        update_HGNC_symbols(store, variant_genes, genome_build)

        # Populate variant HGVS and predictions
        variant_genes = variant_obj.get("genes")
        hgvs_c = []
        hgvs_p = []
        if variant_genes is not None:
            for gene_obj in variant_genes:
                hgnc_id = gene_obj["hgnc_id"]
                gene_caption = store.hgnc_gene_caption(hgnc_id)
                gene_symbols = [gene_caption["hgnc_symbol"]]

                # gather HGVS info from gene transcripts
                (hgvs_nucleotide, hgvs_protein) = get_hgvs(gene_obj)
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                variant_obj["hgvs"] = hgvs_str(gene_symbols, hgvs_p, hgvs_c)

            # populate variant predictions for display
            variant_obj.update(predictions(variant_genes))

        variants.append(variant_obj)

    return {"variants": variants, "more_variants": more_variants}
Пример #7
0
def test_gene_predictions_one_gene_no_sift():
    ## GIVEN a list with one gene and some missing values
    gene = {
        "hgnc_symbol": "AAA",
        "polyphen_prediction": "probably_damaging",
        "region_annotation": "exonic",
        "functional_annotation": "missense_variant",
        "spliceai_score": 0.17,
        "spliceai_prediction": ["ds 0.17"],
    }
    genes = [gene]

    ## WHEN parsing the gene predictions
    res = predictions(genes)
    ## THEN assert the result is correctly filled
    assert res == {
        "sift_predictions": ["-"],
        "polyphen_predictions": ["probably_damaging"],
        "region_annotations": ["exonic"],
        "functional_annotations": ["missense_variant"],
        "spliceai_scores": [0.17],
        "spliceai_positions": ["-"],
        "spliceai_predictions": [["ds 0.17"]],
    }
Пример #8
0
def parse_variant(
    store,
    institute_obj,
    case_obj,
    variant_obj,
    update=False,
    genome_build="37",
    get_compounds=True,
):
    """Parse information about variants.
    - Adds information about compounds
    - Updates the information about compounds if necessary and 'update=True'
    Args:
        store(scout.adapter.MongoAdapter)
        institute_obj(scout.models.Institute)
        case_obj(scout.models.Case)
        variant_obj(scout.models.Variant)
        update(bool): If variant should be updated in database
        genome_build(str)
    """
    has_changed = False
    compounds = variant_obj.get("compounds", [])
    if compounds and get_compounds:
        # Check if we need to add compound information
        # If it is the first time the case is viewed we fill in some compound information
        if "not_loaded" not in compounds[0]:
            new_compounds = store.update_variant_compounds(variant_obj)
            variant_obj["compounds"] = new_compounds
            has_changed = True

        # sort compounds on combined rank score
        variant_obj["compounds"] = sorted(
            variant_obj["compounds"],
            key=lambda compound: -compound["combined_score"])

    # Update the hgnc symbols if they are incorrect
    variant_genes = variant_obj.get("genes")
    if variant_genes is not None:
        for gene_obj in variant_genes:
            # If there is no hgnc id there is nothin we can do
            if not gene_obj["hgnc_id"]:
                continue
            # Else we collect the gene object and check the id
            if gene_obj.get("hgnc_symbol") is None:
                hgnc_gene = store.hgnc_gene(gene_obj["hgnc_id"],
                                            build=genome_build)
                if not hgnc_gene:
                    continue
                has_changed = True
                gene_obj["hgnc_symbol"] = hgnc_gene["hgnc_symbol"]

    # We update the variant if some information was missing from loading
    # Or if symbold in reference genes have changed
    if update and has_changed:
        variant_obj = store.update_variant(variant_obj)

    variant_obj["comments"] = store.events(
        institute_obj,
        case=case_obj,
        variant_id=variant_obj["variant_id"],
        comments=True,
    )

    if variant_genes:
        variant_obj.update(predictions(variant_genes))
        if variant_obj.get("category") == "cancer":
            variant_obj.update(get_variant_info(variant_genes))

    for compound_obj in compounds:
        compound_obj.update(predictions(compound_obj.get("genes", [])))

    classification = variant_obj.get("acmg_classification")
    if isinstance(classification, int):
        acmg_code = ACMG_MAP[variant_obj["acmg_classification"]]
        variant_obj["acmg_classification"] = ACMG_COMPLETE_MAP[acmg_code]

    # convert length for SV variants
    variant_length = variant_obj.get("length")
    variant_obj["length"] = {
        100000000000: "inf",
        -1: "n.d."
    }.get(variant_length, variant_length)
    if not "end_chrom" in variant_obj:
        variant_obj["end_chrom"] = variant_obj["chromosome"]

    return variant_obj
Пример #9
0
def gene_variants(store, variants_query, institute_id, page=1, per_page=50):
    """Pre-process list of variants."""
    # We need to call variants_collection.count_documents here
    variant_count = variants_query.count()
    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = variants_query.skip(skip_count).limit(per_page)

    my_institutes = set(inst["_id"]
                        for inst in user_institutes(store, current_user))

    variants = []
    for variant_obj in variant_res:
        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj["case_id"])
        if not variant_case_obj:
            # A variant with missing case was encountered
            continue
        case_display_name = variant_case_obj.get("display_name")
        variant_obj["case_display_name"] = case_display_name

        # hide other institutes for now
        other_institutes = set([variant_case_obj.get("owner")])
        other_institutes.update(set(variant_case_obj.get("collaborators", [])))
        if my_institutes.isdisjoint(other_institutes):
            # If the user does not have access to the information we skip it
            continue

        genome_build = variant_case_obj.get("genome_build", "37")
        if genome_build not in ["37", "38"]:
            genome_build = "37"

        # Update the HGNC symbols if they are not set
        variant_genes = variant_obj.get("genes")
        if variant_genes is not None:
            for gene_obj in variant_genes:
                # If there is no hgnc id there is nothin we can do
                if not gene_obj["hgnc_id"]:
                    continue
                # Else we collect the gene object and check the id
                if (gene_obj.get("hgnc_symbol") is None
                        or gene_obj.get("description") is None):
                    hgnc_gene = store.hgnc_gene(gene_obj["hgnc_id"],
                                                build=genome_build)
                    if not hgnc_gene:
                        continue
                    gene_obj["hgnc_symbol"] = hgnc_gene["hgnc_symbol"]
                    gene_obj["description"] = hgnc_gene["description"]

        # Populate variant HGVS and predictions
        gene_ids = []
        gene_symbols = []
        hgvs_c = []
        hgvs_p = []
        variant_genes = variant_obj.get("genes")

        if variant_genes is not None:
            functional_annotation = ""

            for gene_obj in variant_genes:
                hgnc_id = gene_obj["hgnc_id"]
                gene_symbol = gene(store, hgnc_id)["symbol"]
                gene_ids.append(hgnc_id)
                gene_symbols.append(gene_symbol)

                hgvs_nucleotide = "-"
                # gather HGVS info from gene transcripts
                transcripts_list = gene_obj.get("transcripts")
                for transcript_obj in transcripts_list:
                    if (transcript_obj.get("is_canonical")
                            and transcript_obj.get("is_canonical") is True):
                        hgvs_nucleotide = str(
                            transcript_obj.get("coding_sequence_name"))
                        hgvs_protein = str(
                            transcript_obj.get("protein_sequence_name"))
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                if hgvs_p[0] != "None":
                    hgvs = hgvs_p[0]
                elif hgvs_c[0] != "None":
                    hgvs = hgvs_c[0]
                else:
                    hgvs = "-"
                variant_obj["hgvs"] = hgvs

            # populate variant predictions for display
            variant_obj.update(predictions(variant_genes))

        variants.append(variant_obj)

    return {"variants": variants, "more_variants": more_variants}