Пример #1
0
def test_add_hg38_gene_links():
    """Test to add hg38 gene links to a gene object"""
    # GIVEN a minimal gene and a genome build
    gene_obj = {"hgnc_id": 257}
    build = 38
    # WHEN adding the gene links
    add_gene_links(gene_obj, build)
    # THEN assert some links are added
    assert "hgnc_link" in gene_obj
Пример #2
0
def test_ucsc_link():
    """Test if ucsc link is correctly added"""
    # GIVEN a minimal gene and a genome build
    gene_obj = {"hgnc_id": 257, "ucsc_id": "uc001jwi.4"}
    build = 37
    # WHEN adding the gene links
    add_gene_links(gene_obj, build)
    # THEN assert some links are added
    link = gene_obj.get("ucsc_link")
    assert link is not None
Пример #3
0
def gene(store, hgnc_id):
    """Parse information about a gene."""
    res = {
        'builds': {
            '37': None,
            '38': None
        },
        'symbol': None,
        'description': None,
        'ensembl_id': None,
        'record': None
    }

    for build in res['builds']:
        record = store.hgnc_gene(hgnc_id, build=build)
        if record:

            record[
                'position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format(
                    this=record)
            res['aliases'] = record['aliases']
            res['hgnc_id'] = record['hgnc_id']
            res['description'] = record['description']
            res['builds'][build] = record
            res['symbol'] = record['hgnc_symbol']
            res['description'] = record['description']
            res['entrez_id'] = record.get('entrez_id')
            res['pli_score'] = record.get('pli_score')

            add_gene_links(record, int(build))

            res['omim_id'] = record.get('omim_id')
            res['incomplete_penetrance'] = record.get('incomplete_penetrance',
                                                      False)
            res['inheritance_models'] = record.get('inheritance_models', [])
            for transcript in record['transcripts']:
                transcript['position'] = (
                    "{this[chrom]}:{this[start]}-{this[end]}".format(
                        this=transcript))
                add_tx_links(transcript, build)

            for phenotype in record.get('phenotypes', []):
                phenotype['omim_link'] = omim(phenotype.get('mim_number'))

            if not res['record']:
                res['record'] = record
            pp(record)

    # If none of the genes where found
    if not any(res.values()):
        raise ValueError

    return res
Пример #4
0
def gene(store, hgnc_id):
    """Parse information about a gene."""
    res = {
        "builds": {
            "37": None,
            "38": None
        },
        "symbol": None,
        "description": None,
        "ensembl_id": None,
        "record": None,
    }

    for build in res["builds"]:
        record = store.hgnc_gene(hgnc_id, build=build)
        if record:

            record[
                "position"] = "{this[chromosome]}:{this[start]}-{this[end]}".format(
                    this=record)
            res["aliases"] = record["aliases"]
            res["hgnc_id"] = record["hgnc_id"]
            res["description"] = record["description"]
            res["builds"][build] = record
            res["symbol"] = record["hgnc_symbol"]
            res["description"] = record["description"]
            res["entrez_id"] = record.get("entrez_id")
            res["pli_score"] = record.get("pli_score")

            add_gene_links(record, int(build))

            res["omim_id"] = record.get("omim_id")
            res["incomplete_penetrance"] = record.get("incomplete_penetrance",
                                                      False)
            res["inheritance_models"] = record.get("inheritance_models", [])
            for transcript in record["transcripts"]:
                transcript[
                    "position"] = "{this[chrom]}:{this[start]}-{this[end]}".format(
                        this=transcript)
                add_tx_links(transcript, build)

            for phenotype in record.get("phenotypes", []):
                phenotype["omim_link"] = omim(phenotype.get("mim_number"))

            if not res["record"]:
                res["record"] = record

    # If none of the genes where found
    if not any(res.values()):
        raise ValueError

    return res
Пример #5
0
def parse_gene(gene_obj, build=None):
    """Parse variant genes."""
    build = build or 37

    if gene_obj['common']:
        add_gene_links(gene_obj, build)
        refseq_transcripts = []
        for tx_obj in gene_obj['transcripts']:
            parse_transcript(gene_obj, tx_obj, build)

            # select refseq transcripts as "primary"
            if not tx_obj.get('refseq_id'):
                continue

            refseq_transcripts.append(tx_obj)

        gene_obj['primary_transcripts'] = (refseq_transcripts if refseq_transcripts else [])
Пример #6
0
def parse_gene(gene_obj, build=None):
    """Parse variant genes."""
    build = build or 37

    if gene_obj['common']:
        add_gene_links(gene_obj, build)

        refseq_transcripts = [
            transcript for transcript in gene_obj['transcripts']
            if transcript.get('refseq_id')
        ]
        # select refseq transcripts as "primary" or use all Ensembl transcripts
        gene_obj['primary_transcripts'] = (refseq_transcripts
                                           if len(refseq_transcripts) > 0 else
                                           gene_obj['transcripts'])

    for tx_obj in gene_obj['transcripts']:
        parse_transcript(gene_obj, tx_obj, build)
Пример #7
0
def gene(store, hgnc_id):
    """Parse information about a gene."""
    res = {'builds': {'37': None, '38': None}, 'symbol': None, 'description': None, 'ensembl_id': None, 'record': None}

    for build in res['builds']:
        record = store.hgnc_gene(hgnc_id, build=build)
        if record:

            record['position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format(this=record)
            res['aliases'] = record['aliases']
            res['hgnc_id'] = record['hgnc_id']
            res['description'] = record['description']
            res['builds'][build] = record
            res['symbol'] = record['hgnc_symbol']
            res['description'] = record['description']
            res['entrez_id'] = record.get('entrez_id')
            res['pli_score'] = record.get('pli_score')

            add_gene_links(record, int(build))

            res['omim_id'] = record.get('omim_id')
            res['incomplete_penetrance'] = record.get('incomplete_penetrance',False)
            res['inheritance_models'] = record.get('inheritance_models',[])
            for transcript in record['transcripts']:
                transcript['position'] = ("{this[chrom]}:{this[start]}-{this[end]}"
                                          .format(this=transcript))
                add_tx_links(transcript, build)

            for phenotype in record.get('phenotypes',[]):
                phenotype['omim_link'] = omim(phenotype.get('mim_number'))

            if not res['record']:
                res['record'] = record

    # If none of the genes where found
    if not any(res.values()):
        raise ValueError

    return res
Пример #8
0
def add_gene_info(store, variant_obj, gene_panels=None, genome_build=None):
    """Adds information to variant genes from hgnc genes and gene panels.

    Variants are annotated with gene and transcript information from VEP. In Scout the database
    keeps updated and extended information about genes and transcript. This function will compliment
     the VEP information with the updated database information.
    Also there is sometimes additional information that are manually curated in the gene panels.
    This information needs to be added to the variant before sending it to the template.

    This function will loop over all genes and add that extra information.

    Args:
        store(scout.adapter.MongoAdapter)
        variant_obj(dict): A variant from the database
        gene_panels(list(dict)): List of panels from database
        genome_build(str)

    Returns:
        variant_obj
    """
    gene_panels = gene_panels or []
    genome_build = genome_build or "37"

    # Add a variable that checks if there are any refseq transcripts

    # extra_info will hold information from gene panels
    extra_info = {}
    for panel_obj in gene_panels:
        for gene_info in panel_obj["genes"]:
            hgnc_id = gene_info["hgnc_id"]
            if hgnc_id not in extra_info:
                extra_info[hgnc_id] = []

            extra_info[hgnc_id].append(gene_info)

    # Loop over the genes in the variant object to add information
    # from hgnc_genes and panel genes to the variant object
    variant_obj["has_refseq"] = False
    variant_obj["disease_associated_transcripts"] = []
    all_models = set()
    for variant_gene in variant_obj.get("genes", []):
        hgnc_id = variant_gene["hgnc_id"]
        # Get the hgnc_gene
        hgnc_gene = store.hgnc_gene(hgnc_id, build=genome_build)

        if not hgnc_gene:
            continue
        hgnc_symbol = hgnc_gene["hgnc_symbol"]
        # Add omim information if gene is annotated to have incomplete penetrance
        if hgnc_gene.get("incomplete_penetrance"):
            variant_gene["omim_penetrance"] = True

        ############# PANEL SPECIFIC INFORMATION #############
        # Panels can have extra information about genes and transcripts
        panel_info = add_panel_specific_gene_info(extra_info.get(hgnc_id, []))
        variant_gene.update(panel_info)

        update_transcripts_information(variant_gene, hgnc_gene, variant_obj)

        variant_gene["common"] = hgnc_gene

        add_gene_links(variant_gene, genome_build)

        # Add disease associated transcripts from panel to variant
        for refseq_id in panel_info.get("disease_associated_transcripts", []):
            transcript_str = "{}:{}".format(hgnc_symbol, refseq_id)
            variant_obj["disease_associated_transcripts"].append(
                transcript_str)

        # Add the associated disease terms
        disease_terms = store.disease_terms(hgnc_id)
        variant_gene["disease_terms"] = disease_terms

        all_models = all_models.union(set(variant_gene["manual_inheritance"]))
        omim_models = set()
        for disease_term in variant_gene.get("disease_terms", []):
            omim_models.update(disease_term.get("inheritance", []))
        variant_gene["omim_inheritance"] = list(omim_models)

        all_models = all_models.union(omim_models)

    variant_obj["all_models"] = all_models

    return variant_obj