示例#1
0
def gene_variants(store,
                  pymongo_cursor,
                  variant_count,
                  institute_id,
                  page=1,
                  per_page=50):
    """Pre-process list of variants."""

    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = pymongo_cursor.skip(skip_count).limit(per_page)
    my_institutes = set(inst["_id"]
                        for inst in user_institutes(store, current_user))
    variants = []

    for variant_obj in variant_res:
        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj["case_id"])
        if not variant_case_obj:
            # A variant with missing case was encountered
            continue
        case_display_name = variant_case_obj.get("display_name")
        variant_obj["case_display_name"] = case_display_name

        # hide other institutes for now
        other_institutes = set([variant_case_obj.get("owner")])
        other_institutes.update(set(variant_case_obj.get("collaborators", [])))
        if my_institutes.isdisjoint(other_institutes):
            # If the user does not have access to the information we skip it
            continue

        genome_build = get_genome_build(variant_case_obj)
        variant_genes = variant_obj.get("genes")
        gene_object = update_HGNC_symbols(store, variant_genes, genome_build)

        # Populate variant HGVS and predictions
        variant_genes = variant_obj.get("genes")
        hgvs_c = []
        hgvs_p = []
        if variant_genes is not None:
            for gene_obj in variant_genes:
                hgnc_id = gene_obj["hgnc_id"]
                gene_symbol = gene(store, hgnc_id)["symbol"]
                gene_symbols = [gene_symbol]

                # gather HGVS info from gene transcripts
                (hgvs_nucleotide, hgvs_protein) = get_hgvs(gene_obj)
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                variant_obj["hgvs"] = hgvs_str(gene_symbols, hgvs_p, hgvs_c)

            # populate variant predictions for display
            variant_obj.update(predictions(variant_genes))

        variants.append(variant_obj)

    return {"variants": variants, "more_variants": more_variants}
示例#2
0
def variant_export_genes_info(store, gene_list):
    """Adds gene info to a list of fields corresponding to a variant to be exported.

    Args:
        gene_list(list) A list of gene objects contained in the variant

    Returns:
        gene_info(list) A list of gene-relates string info
    """
    gene_ids = []
    gene_names = []
    hgvs_c = []

    gene_info = []

    for gene_obj in gene_list:
        hgnc_id = gene_obj["hgnc_id"]
        gene_name = gene(store, hgnc_id)["symbol"]

        gene_ids.append(hgnc_id)
        gene_names.append(gene_name)

        hgvs_nucleotide = "-"
        # gather HGVS info from gene transcripts
        transcripts_list = gene_obj.get("transcripts")
        for transcript_obj in transcripts_list:
            if (transcript_obj.get("is_canonical") is not None
                    and transcript_obj.get("is_canonical") is True):
                hgvs_nucleotide = str(
                    transcript_obj.get("coding_sequence_name"))
        hgvs_c.append(hgvs_nucleotide)

    gene_info.append(";".join(str(x) for x in gene_ids))
    gene_info.append(";".join(str(x) for x in gene_names))
    gene_info.append(";".join(str(x) for x in hgvs_c))

    return gene_info
示例#3
0
def variant_export_lines(store, case_obj, variants_query):
    """Get variants info to be exported to file, one list (line) per variant.

        Args:
            store(scout.adapter.MongoAdapter)
            case_obj(scout.models.Case)
            variants_query: a list of variant objects, each one is a dictionary

        Returns:
            export_variants: a list of strings. Each string  of the list corresponding to the fields
                             of a variant to be exported to file, separated by comma
    """

    export_variants = []

    for variant in variants_query:
        variant_line = []
        position = variant['position']
        change = variant['reference']+'>'+variant['alternative']
        variant_line.append(variant['rank_score'])
        variant_line.append(variant['chromosome'])
        variant_line.append(position)
        variant_line.append(change)
        variant_line.append('_'.join([str(position), change]))

        # gather gene info:
        gene_list = variant.get('genes') #this is a list of gene objects
        gene_ids = []
        gene_names = []
        hgvs_c = []

        # if variant is in genes
        if len(gene_list) > 0:
            for gene_obj in gene_list:
                hgnc_id = gene_obj['hgnc_id']
                gene_name = gene(store, hgnc_id)['symbol']

                gene_ids.append(hgnc_id)
                gene_names.append(gene_name)

                hgvs_nucleotide = '-'
                # gather HGVS info from gene transcripts
                transcripts_list = gene_obj.get('transcripts')
                for transcript_obj in transcripts_list:
                    if transcript_obj.get('is_canonical') and transcript_obj.get('is_canonical') is True:
                        hgvs_nucleotide = str(transcript_obj.get('coding_sequence_name'))
                hgvs_c.append(hgvs_nucleotide)

            variant_line.append(';'.join( str(x) for x in  gene_ids))
            variant_line.append(';'.join( str(x) for x in  gene_names))
            variant_line.append(';'.join( str(x) for x in  hgvs_c))
        else:
            while i < 4:
                variant_line.append('-') # instead of gene ids
                i = i+1

        variant_gts = variant['samples'] # list of coverage and gt calls for case samples
        for individual in case_obj['individuals']:
            for variant_gt in variant_gts:
                if individual['individual_id'] == variant_gt['sample_id']:
                    # gather coverage info
                    variant_line.append(variant_gt['allele_depths'][0]) # AD reference
                    variant_line.append(variant_gt['allele_depths'][1]) # AD alternate
                    # gather genotype quality info
                    variant_line.append(variant_gt['genotype_quality'])

        variant_line = [str(i) for i in variant_line]
        export_variants.append(",".join(variant_line))

    return export_variants
示例#4
0
def variant_export_lines(store, case_obj, variants_query):
    """Get variants info to be exported to file, one list (line) per variant.
        Args:
            store(scout.adapter.MongoAdapter)
            case_obj(scout.models.Case)
            variants_query: a list of variant objects, each one is a dictionary
        Returns:
            export_variants: a list of strings. Each string  of the list corresponding to the fields
                             of a variant to be exported to file, separated by comma
    """

    export_variants = []

    for variant in variants_query:
        variant_line = []
        position = variant["position"]
        change = variant["reference"] + ">" + variant["alternative"]
        variant_line.append(variant["rank_score"])
        variant_line.append(variant["chromosome"])
        variant_line.append(position)
        variant_line.append(change)
        variant_line.append("_".join([str(position), change]))

        # gather gene info:
        gene_list = variant.get("genes")  # this is a list of gene objects
        gene_ids = []
        gene_names = []
        hgvs_c = []

        # if variant is in genes
        if len(gene_list) > 0:
            for gene_obj in gene_list:
                hgnc_id = gene_obj["hgnc_id"]
                gene_name = gene(store, hgnc_id)["symbol"]

                gene_ids.append(hgnc_id)
                gene_names.append(gene_name)

                hgvs_nucleotide = "-"
                # gather HGVS info from gene transcripts
                transcripts_list = gene_obj.get("transcripts")
                for transcript_obj in transcripts_list:
                    if (transcript_obj.get("is_canonical")
                            and transcript_obj.get("is_canonical") is True):
                        hgvs_nucleotide = str(
                            transcript_obj.get("coding_sequence_name"))
                hgvs_c.append(hgvs_nucleotide)

            variant_line.append(";".join(str(x) for x in gene_ids))
            variant_line.append(";".join(str(x) for x in gene_names))
            variant_line.append(";".join(str(x) for x in hgvs_c))
        else:
            i = 0
            while i < 4:
                variant_line.append("-")  # instead of gene ids
                i = i + 1

        variant_gts = variant[
            "samples"]  # list of coverage and gt calls for case samples
        for individual in case_obj["individuals"]:
            for variant_gt in variant_gts:
                if individual["individual_id"] == variant_gt["sample_id"]:
                    # gather coverage info
                    variant_line.append(
                        variant_gt["allele_depths"][0])  # AD reference
                    variant_line.append(
                        variant_gt["allele_depths"][1])  # AD alternate
                    # gather genotype quality info
                    variant_line.append(variant_gt["genotype_quality"])

        variant_line = [str(i) for i in variant_line]
        export_variants.append(",".join(variant_line))

    return export_variants
示例#5
0
def gene_variants(store, variants_query, page=1, per_page=50):
    """Pre-process list of variants."""
    variant_count = variants_query.count()
    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = variants_query.skip(skip_count).limit(per_page)

    my_institutes = list(inst['_id']
                         for inst in user_institutes(store, current_user))

    variants = []
    for variant_obj in variant_res:
        # hide other institutes for now
        if variant_obj['institute'] not in my_institutes:
            LOG.warning("Institute {} not allowed.".format(
                variant_obj['institute']))
            continue

        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj['case_id'])
        if not variant_case_obj:
            # A variant with missing case was encountered
            continue
        case_display_name = variant_case_obj.get('display_name')
        variant_obj['case_display_name'] = case_display_name

        genome_build = variant_case_obj.get('genome_build', '37')
        if genome_build not in ['37', '38']:
            genome_build = '37'

        # Update the HGNC symbols if they are not set
        variant_genes = variant_obj.get('genes')
        if variant_genes is not None:
            for gene_obj in variant_genes:
                # If there is no hgnc id there is nothin we can do
                if not gene_obj['hgnc_id']:
                    continue
                # Else we collect the gene object and check the id
                if gene_obj.get('hgnc_symbol') is None or gene_obj.get(
                        'description') is None:
                    hgnc_gene = store.hgnc_gene(gene_obj['hgnc_id'],
                                                build=genome_build)
                    if not hgnc_gene:
                        continue
                    gene_obj['hgnc_symbol'] = hgnc_gene['hgnc_symbol']
                    gene_obj['description'] = hgnc_gene['description']

        # Populate variant HGVS and predictions
        gene_ids = []
        gene_symbols = []
        hgvs_c = []
        hgvs_p = []
        variant_genes = variant_obj.get('genes')

        if variant_genes is not None:
            functional_annotation = ''

            for gene_obj in variant_genes:
                hgnc_id = gene_obj['hgnc_id']
                gene_symbol = gene(store, hgnc_id)['symbol']
                gene_ids.append(hgnc_id)
                gene_symbols.append(gene_symbol)

                hgvs_nucleotide = '-'
                # gather HGVS info from gene transcripts
                transcripts_list = gene_obj.get('transcripts')
                for transcript_obj in transcripts_list:
                    if transcript_obj.get(
                            'is_canonical'
                    ) and transcript_obj.get('is_canonical') is True:
                        hgvs_nucleotide = str(
                            transcript_obj.get('coding_sequence_name'))
                        hgvs_protein = str(
                            transcript_obj.get('protein_sequence_name'))
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                if (hgvs_p[0] != "None"):
                    hgvs = hgvs_p[0]
                elif (hgvs_c[0] != "None"):
                    hgvs = hgvs_c[0]
                else:
                    hgvs = "-"
                variant_obj['hgvs'] = hgvs

            # populate variant predictions for display
            variant_obj.update(get_predictions(variant_genes))

        variants.append(variant_obj)

    return {
        'variants': variants,
        'more_variants': more_variants,
    }
示例#6
0
def gene_variants(store, variants_query, institute_id, page=1, per_page=50):
    """Pre-process list of variants."""
    # We need to call variants_collection.count_documents here
    variant_count = variants_query.count()
    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = variants_query.skip(skip_count).limit(per_page)

    my_institutes = set(inst["_id"]
                        for inst in user_institutes(store, current_user))

    variants = []
    for variant_obj in variant_res:
        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj["case_id"])
        if not variant_case_obj:
            # A variant with missing case was encountered
            continue
        case_display_name = variant_case_obj.get("display_name")
        variant_obj["case_display_name"] = case_display_name

        # hide other institutes for now
        other_institutes = set([variant_case_obj.get("owner")])
        other_institutes.update(set(variant_case_obj.get("collaborators", [])))
        if my_institutes.isdisjoint(other_institutes):
            # If the user does not have access to the information we skip it
            continue

        genome_build = variant_case_obj.get("genome_build", "37")
        if genome_build not in ["37", "38"]:
            genome_build = "37"

        # Update the HGNC symbols if they are not set
        variant_genes = variant_obj.get("genes")
        if variant_genes is not None:
            for gene_obj in variant_genes:
                # If there is no hgnc id there is nothin we can do
                if not gene_obj["hgnc_id"]:
                    continue
                # Else we collect the gene object and check the id
                if (gene_obj.get("hgnc_symbol") is None
                        or gene_obj.get("description") is None):
                    hgnc_gene = store.hgnc_gene(gene_obj["hgnc_id"],
                                                build=genome_build)
                    if not hgnc_gene:
                        continue
                    gene_obj["hgnc_symbol"] = hgnc_gene["hgnc_symbol"]
                    gene_obj["description"] = hgnc_gene["description"]

        # Populate variant HGVS and predictions
        gene_ids = []
        gene_symbols = []
        hgvs_c = []
        hgvs_p = []
        variant_genes = variant_obj.get("genes")

        if variant_genes is not None:
            functional_annotation = ""

            for gene_obj in variant_genes:
                hgnc_id = gene_obj["hgnc_id"]
                gene_symbol = gene(store, hgnc_id)["symbol"]
                gene_ids.append(hgnc_id)
                gene_symbols.append(gene_symbol)

                hgvs_nucleotide = "-"
                # gather HGVS info from gene transcripts
                transcripts_list = gene_obj.get("transcripts")
                for transcript_obj in transcripts_list:
                    if (transcript_obj.get("is_canonical")
                            and transcript_obj.get("is_canonical") is True):
                        hgvs_nucleotide = str(
                            transcript_obj.get("coding_sequence_name"))
                        hgvs_protein = str(
                            transcript_obj.get("protein_sequence_name"))
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                if hgvs_p[0] != "None":
                    hgvs = hgvs_p[0]
                elif hgvs_c[0] != "None":
                    hgvs = hgvs_c[0]
                else:
                    hgvs = "-"
                variant_obj["hgvs"] = hgvs

            # populate variant predictions for display
            variant_obj.update(predictions(variant_genes))

        variants.append(variant_obj)

    return {"variants": variants, "more_variants": more_variants}
示例#7
0
def gene_variants(store, variants_query, page=1, per_page=50):
    """Pre-process list of variants."""
    variant_count = variants_query.count()
    skip_count = per_page * max(page - 1, 0)
    more_variants = True if variant_count > (skip_count + per_page) else False
    variant_res = variants_query.skip(skip_count).limit(per_page)

    my_institutes = list(inst['_id'] for inst in user_institutes(store, current_user))

    variants = []
    for variant_obj in variant_res:
        # hide other institutes for now
        if variant_obj['institute'] not in my_institutes:
            LOG.warning("Institute {} not allowed.".format(variant_obj['institute']))
            continue

        # Populate variant case_display_name
        variant_case_obj = store.case(case_id=variant_obj['case_id'])
        if not variant_case_obj:
            # A variant with missing case was encountered
            continue
        case_display_name = variant_case_obj.get('display_name')
        variant_obj['case_display_name'] = case_display_name

        genome_build = variant_case_obj.get('genome_build', '37')
        if genome_build not in ['37','38']:
            genome_build = '37'

        # Update the HGNC symbols if they are not set
        variant_genes = variant_obj.get('genes')
        if variant_genes is not None:
            for gene_obj in variant_genes:
                # If there is no hgnc id there is nothin we can do
                if not gene_obj['hgnc_id']:
                    continue
                # Else we collect the gene object and check the id
                if gene_obj.get('hgnc_symbol') is None or gene_obj.get('description') is None:
                    hgnc_gene = store.hgnc_gene(gene_obj['hgnc_id'], build=genome_build)
                    if not hgnc_gene:
                        continue
                    gene_obj['hgnc_symbol'] = hgnc_gene['hgnc_symbol']
                    gene_obj['description'] = hgnc_gene['description']

        # Populate variant HGVS and predictions
        gene_ids = []
        gene_symbols = []
        hgvs_c = []
        hgvs_p = []
        variant_genes = variant_obj.get('genes')

        if variant_genes is not None:
            functional_annotation = ''

            for gene_obj in variant_genes:
                hgnc_id = gene_obj['hgnc_id']
                gene_symbol = gene(store, hgnc_id)['symbol']
                gene_ids.append(hgnc_id)
                gene_symbols.append(gene_symbol)

                hgvs_nucleotide = '-'
                # gather HGVS info from gene transcripts
                transcripts_list = gene_obj.get('transcripts')
                for transcript_obj in transcripts_list:
                    if transcript_obj.get('is_canonical') and transcript_obj.get('is_canonical') is True:
                        hgvs_nucleotide = str(transcript_obj.get('coding_sequence_name'))
                        hgvs_protein = str(transcript_obj.get('protein_sequence_name'))
                hgvs_c.append(hgvs_nucleotide)
                hgvs_p.append(hgvs_protein)

            if len(gene_symbols) == 1:
                if(hgvs_p[0] != "None"):
                    hgvs = hgvs_p[0]
                elif(hgvs_c[0] != "None"):
                    hgvs = hgvs_c[0]
                else:
                    hgvs = "-"
                variant_obj['hgvs'] = hgvs

            # populate variant predictions for display
            variant_obj.update(get_predictions(variant_genes))

        variants.append(variant_obj)

    return {
        'variants': variants,
        'more_variants': more_variants,
    }