예제 #1
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")
    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" %
                     (gene_id, gene))
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)
    num_indivs = len(
        [i for i in project.get_individuals() if i.has_variant_data()])
    aac_threshold = (.2 * num_indivs) + 5
    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(
            project, gene_id, variant_filter=variant_filter):
        aac = get_alt_allele_count(variant)
        max_af = max(variant.annotation['freqs'].values())
        if aac <= aac_threshold and max_af < .01:
            rare_variants.append(variant)

    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    knockouts = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for kid in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([kid])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        knockouts.append({
            'indiv_id': kid,
            'variants': [v.toJSON() for v in variants],
        })

    sys.stderr.write("Retrieved %s variants \n" % len(rare_variants))
    return render(
        request, 'project/gene_quicklook.html', {
            'gene':
            gene,
            'gene_json':
            json.dumps(gene),
            'project':
            project,
            'rare_variants_json':
            json.dumps([v.toJSON() for v in rare_variants]),
            'individuals_json':
            json.dumps([i.get_json_obj() for i in project.get_individuals()]),
            'knockouts_json':
            json.dumps(knockouts),
        })
예제 #2
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")
    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" % (gene_id, gene))
    variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)
    num_indivs = len([i for i in project.get_individuals() if i.has_variant_data()])
    aac_threshold = (.2 * num_indivs) + 5
    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
        aac = get_alt_allele_count(variant)
        max_af = max(variant.annotation['freqs'].values())
        if aac <= aac_threshold and max_af < .01:
            rare_variants.append(variant)

    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    knockouts = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for kid in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([kid])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        knockouts.append({
            'indiv_id': kid,
            'variants': [v.toJSON() for v in variants],
        })

    sys.stderr.write("Retrieved %s variants \n" % len(rare_variants))
    return render(request, 'project/gene_quicklook.html', {
        'gene': gene,
        'gene_json': json.dumps(gene),
        'project': project,
        'rare_variants_json': json.dumps([v.toJSON() for v in rare_variants]),
        'individuals_json': json.dumps([i.get_json_obj() for i in project.get_individuals()]),
        'knockouts_json': json.dumps(knockouts),
    })
예제 #3
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")

    if project.project_status == Project.NEEDS_MORE_PHENOTYPES and not request.user.is_staff:
        return render(request, 'analysis_unavailable.html',
                      {'reason': 'Awaiting phenotype data.'})

    # other projects this user can view
    if request.user.is_staff:
        other_projects = [p for p in Project.objects.all()]  #  if p != project
    else:
        other_projects = [
            c.project
            for c in ProjectCollaborator.objects.filter(user=request.user)
        ]  # if c.project != project

    other_projects = filter(
        lambda p: get_project_datastore(p.project_id).
        project_collection_is_loaded(p.project_id), other_projects)

    if other_projects:
        other_projects_json = json.dumps([{
            'project_id': p.project_id,
            'project_name': p.project_name
        } for p in sorted(other_projects, key=lambda p: p.project_id)])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
                'other_projects_json': other_projects_json,
            })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        projects_to_search = []
        project_ids = projects_to_search_param.split(",")
        for project_id in project_ids:
            project = get_object_or_404(Project, project_id=project_id)
            if not project.can_view(request.user):
                return HttpResponse("Unauthorized")
            projects_to_search.append(project)
    else:
        projects_to_search = [project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(
        project_id + " - staring gene search for: %s in projects: %s\n" %
        (gene_id, ",".join([p.project_id for p in projects_to_search]) + "\n"))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    for project in projects_to_search:
        project_variants = []
        for variant in project_analysis.get_variants_in_gene(
                project, gene_id, variant_filter=variant_filter):
            max_af = max(variant.annotation['freqs'].values())
            if not any([
                    indiv_id
                    for indiv_id, genotype in variant.genotypes.items()
                    if genotype.num_alt > 0
            ]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos,
                                          variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                rare_variant_dict[variant_id].genotypes.update(
                    variant.genotypes)

        #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
        add_extra_info_to_variants_project(get_reference(), project,
                                           project_variants)
        rare_variants.extend(project_variants)
    sys.stderr.write("Retreived %s rare variants\n" % len(rare_variants))

    # compute knockout individuals
    individ_ids_and_variants = []
    for project in projects_to_search:
        knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids(
                [indiv_id])
            add_extra_info_to_variants_project(get_reference(), project,
                                               variants)
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })
            #sys.stderr.write("%s : %s: Retrieved %s knockout variants\n" % (project.project_id, indiv_id, len(variants), ))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[
                                indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                        variant.unique_tuple(), ("", ""))

                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    if indiv_id in variant.genotypes and variant.genotypes[
                            indiv_id].num_alt > 0:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + list(
            map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i],
                individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps([
                    i.get_json_obj() for project in projects_to_search
                    for i in project.get_individuals()
                ]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
                'other_projects_json':
                other_projects_json,
            })
예제 #4
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")
    
    if gene_id is None:
        return render(request, 'project/gene_quicklook.html', {
            'project': project,
            'gene': None,
            'gene_json': None,
            'rare_variants_json': None,
            'individuals_json': None,
            'knockouts_json': None,
        })
        
        
    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" % (gene_id, gene))

    # all rare coding variants
    variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)

    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
        max_af = max(variant.annotation['freqs'].values())
        if max_af < .01:
            rare_variants.append(variant)
    #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    # compute knockout individuals
    individ_ids_and_variants = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for indiv_id in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([indiv_id])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        individ_ids_and_variants.append({
            'indiv_id': indiv_id,
            'variants': variants,
        })

    sys.stderr.write("Project-wide gene search retrieved %s rare variants for gene: %s \n" % (len(rare_variants), gene_id))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [individ_id_and_variants["indiv_id"] for individ_id_and_variants in individ_ids_and_variants]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        if genotype.num_alt > 0:
                            genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))

                    rows.append(map(str,
                        [ gene["symbol"],
                          variant.chr,
                          variant.pos,
                          variant.ref,
                          variant.alt,
                          variant.vcf_id or "",
                          variant.annotation.get("vep_consequence", ""),
                          worst_annotation.get("hgvsc", ""),
                          worst_annotation.get("hgvsp", "").replace("%3D", "="),
                          worst_annotation.get("sift", ""),
                          worst_annotation.get("polyphen", ""),
                          worst_annotation.get("mutationtaster_pred", ""),
                          ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),

                          measureset_id,
                          clinvar_significance,

                          variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                          variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                          variant.annotation["freqs"].get("exac_v3", ""),
                          variant.annotation["freqs"].get("exac_v3_popmax", ""),
                          all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    genotype = variant.genotypes[indiv_id]
                    allele_string = ">".join(genotype.alleles)
                    all_genotypes_string += indiv_id + ":" + allele_string + "  "
                    if genotype.num_alt > 0:
                        genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))
                rows.append(map(str,
                    [ gene["symbol"],
                      variant.chr,
                      variant.pos,
                      variant.ref,
                      variant.alt,
                      variant.vcf_id or "",
                      variant.annotation.get("vep_consequence", ""),
                      worst_annotation.get("hgvsc", ""),
                      worst_annotation.get("hgvsp", "").replace("%3D", "="),
                      worst_annotation.get("sift", ""),
                      worst_annotation.get("polyphen", ""),
                      worst_annotation.get("mutationtaster_pred", ""),
                      ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),
                      measureset_id,
                      clinvar_significance,
                      variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                      variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                      variant.annotation["freqs"].get("exac_v3", ""),
                      variant.annotation["freqs"].get("exac_v3_popmax", ""),
                      all_genotypes_string,
                    ] + genotypes))


        header = ["gene", "chr", "pos", "ref", "alt", "rsID", "impact",
                  "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                  "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                  "freq_exac_v3", "freq_exac_v3_popmax",
                  "all_genotypes"] + individuals_to_include

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [v.toJSON() for v in variants]

        return render(request, 'project/gene_quicklook.html', {
            'gene': gene,
            'gene_json': json.dumps(gene),
            'project': project,
            'rare_variants_json': json.dumps([v.toJSON() for v in rare_variants]),
            'individuals_json': json.dumps([i.get_json_obj() for i in project.get_individuals()]),
            'knockouts_json': json.dumps(individ_ids_and_variants),
        })
예제 #5
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    main_project = get_object_or_404(Project, project_id=project_id)
    if not main_project.can_view(request.user):
        return HttpResponse("Unauthorized")

    # other projects this user can view
    other_projects = get_loaded_projects_for_user(
        request.user, fields=['project_id', 'project_name'])

    if other_projects:
        other_projects_json = json.dumps([{
            'project_id': p.project_id,
            'project_name': p.project_name
        } for p in sorted(other_projects, key=lambda p: p.project_id.lower())])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': main_project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
                'other_projects_json': other_projects_json,
            })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        project_ids = projects_to_search_param.split(",")
        projects_to_search = [
            project for project in other_projects
            if project.project_id in project_ids
        ]
        if len(projects_to_search) < len(project_ids):
            # If not all the specified project ids are in the other projects list then they are not authorized
            return HttpResponse("Unauthorized")
    else:
        project_ids = [main_project.project_id]
        projects_to_search = [main_project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(
        project_id + " - staring gene search for: %s in projects: %s\n" %
        (gene_id, ",".join([p.project_id for p in projects_to_search]) + "\n"))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    individ_ids_and_variants = []
    for project in projects_to_search:
        all_project_variants = project_analysis.get_variants_in_gene(
            project, gene_id, variant_filter=variant_filter)

        # compute knockout individuals
        knockout_ids, variation = get_knockouts_in_gene(
            project, gene_id, all_project_variants)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids(
                [indiv_id])
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })

        # compute rare variants
        project_variants = []
        for i, variant in enumerate(all_project_variants):
            max_af = max([
                freq for label, freq in variant.annotation['freqs'].items()
                if label != "AF"
            ])  # don't filter on within-cohort AF

            if not any([
                    indiv_id
                    for indiv_id, genotype in variant.genotypes.items()
                    if genotype.num_alt > 0
            ]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr, variant.pos,
                                          variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                rare_variant_dict[variant_id].genotypes.update(
                    variant.genotypes)

        rare_variants.extend(project_variants)

    all_variants = sum([i['variants'] for i in individ_ids_and_variants],
                       rare_variants)
    add_extra_info_to_variants_project(get_reference(), project, all_variants)
    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv,
                gene.get("symbol") or gene.get("transcript_name"))

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[
                                indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = get_reference(
                    ).get_clinvar_info(*variant.unique_tuple())
                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    if indiv_id in variant.genotypes and variant.genotypes[
                            indiv_id].num_alt > 0:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = get_reference(
                ).get_clinvar_info(*variant.unique_tuple())
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + list(
            map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i],
                individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        individ_ids = {i['indiv_id'] for i in individ_ids_and_variants}
        for var in rare_variants:
            individ_ids.update(var.genotypes.keys())
        individuals = Individual.objects.filter(
            indiv_id__in=individ_ids,
            project__project_id__in=project_ids).select_related(
                'project').select_related('family').only(
                    'project__project_id', 'family__family_id',
                    *Individual.INDIVIDUAL_JSON_FIELDS_NO_IDS)

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                main_project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps([
                    i.get_json_obj(skip_has_variant_data=True)
                    for i in individuals
                ]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
                'other_projects_json':
                other_projects_json,
            })
예제 #6
0
    def search_for_gene(self, search_gene_id, project_id_list, max_af=0.01):
        '''
        Search for a gene across project(s)
        Args:
          1. search_gene_id: Gene ID to search for
          2. proj_list: An optional list of projects to narrow down search to
      '''
        gene_id = get_gene_id_from_str(search_gene_id, get_reference())
        gene = get_reference().get_gene(gene_id)

        print("Staring gene search for: %s %s in projects: %s\n" %
              (search_gene_id, gene['gene_id'], ", ".join(project_id_list)))
        print("Max AF threshold: %s" % max_af)

        # all rare coding variants
        variant_filter = get_default_variant_filter(
            'all_coding',
            mall.get_annotator().reference_population_slugs)
        print("All Filters: ")
        pprint(variant_filter.toJSON())

        output_filename = 'results_' + search_gene_id + '.tsv'
        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        if project_id_list:
            for project_id in project_id_list:
                project = Project.objects.filter(
                    project_id=project_id)[0]  # TODO validate
        else:
            project_id_list = [p.project_id for p in Project.objects.all()]

        for project_id in project_id_list:
            project = Project.objects.filter(project_id=project_id)[0]
            if get_project_datastore(project_id).project_collection_is_loaded(
                    project_id):
                print("Running on project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            for variant in project_analysis.get_variants_in_gene(
                    project, gene_id, variant_filter=variant_filter):
                if max(variant.annotation['freqs'].values()) >= max_af:
                    continue
                #pprint(variant.toJSON())
                add_extra_info_to_variants_project(get_reference(), project,
                                                   [variant])

                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                all_genotypes_list = []
                pass_filter = "N/A"
                for indiv_id, genotype in variant.genotypes.items():
                    pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                    if genotype.num_alt > 0:
                        all_genotypes_list.append(
                            "%s[gt:%s GQ:%s AB:%0.3f]" %
                            (indiv_id, ">".join(
                                genotype.alleles), genotype.gq, genotype.ab
                             if genotype.ab is not None else float('NaN')))

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                row = map(str, [
                    project_id,
                    gene["symbol"],
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id or "",
                    pass_filter,
                    variant.annotation.get("vep_consequence", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("mutationtaster_pred", ""),
                    ";".join(
                        set(
                            worst_annotation.get("fathmm_pred",
                                                 "").split('%3B'))),
                    measureset_id,
                    clinvar_significance,
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax",
                                                    ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    ", ".join(all_genotypes_list),
                ])
                writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
예제 #7
0
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    main_project = get_object_or_404(Project, project_id=project_id)
    if not main_project.can_view(request.user):
        return HttpResponse("Unauthorized")

    new_page_url = '/variant_search/project/{}'.format(main_project.seqr_project.guid) if main_project.seqr_project and main_project.seqr_project.has_new_search else None

    # other projects this user can view
    other_projects = get_loaded_projects_for_user(request.user, fields=['project_id', 'project_name'])

    if other_projects:
        other_projects_json = json.dumps([{'project_id': p.project_id, 'project_name': p.project_name} for p in sorted(other_projects, key=lambda p: p.project_id.lower())])
    else:
        other_projects_json = None

    if gene_id is None:
        return render(request, 'project/gene_quicklook.html', {
            'project': main_project,
            'gene': None,
            'gene_json': None,
            'rare_variants_json': None,
            'individuals_json': None,
            'knockouts_json': None,
            'other_projects_json': other_projects_json,
            'new_page_url': new_page_url,
        })

    projects_to_search_param = request.GET.get('selected_projects')
    if projects_to_search_param:
        project_ids = projects_to_search_param.split(",")
        projects_to_search = [project for project in other_projects if project.project_id in project_ids]
        if len(projects_to_search) < len(project_ids):
            # If not all the specified project ids are in the other projects list then they are not authorized
            return HttpResponse("Unauthorized")
    else:
        project_ids = [main_project.project_id]
        projects_to_search = [main_project]

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)

    # all rare coding variants
    variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)

    indiv_id_to_project_id = {}
    rare_variant_dict = {}
    rare_variants = []
    individ_ids_and_variants = []
    for project in projects_to_search:
        all_project_variants = project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter)

        # compute knockout individuals
        knockout_ids, variation = get_knockouts_in_gene(project, gene_id, all_project_variants)
        for indiv_id in knockout_ids:
            variants = variation.get_relevant_variants_for_indiv_ids([indiv_id])
            individ_ids_and_variants.append({
                'indiv_id': indiv_id,
                'variants': variants,
            })

        # compute rare variants
        project_variants = []
        for i, variant in enumerate(all_project_variants):
            max_af = max([freq for label, freq in variant.annotation['freqs'].items() if label != "AF"])  # don't filter on within-cohort AF

            if not any([indiv_id for indiv_id, genotype in variant.genotypes.items() if genotype.num_alt > 0]):
                continue
            if max_af >= .01:
                continue

            # add project id to genotypes
            for indiv_id in variant.genotypes:
                indiv_id_to_project_id[indiv_id] = project.project_id

            # save this variant (or just the genotypes from this variant if the variant if it's been seen already in another project)
            variant_id = "%s-%s-%s-%s" % (variant.chr,variant.pos, variant.ref, variant.alt)
            if variant_id not in rare_variant_dict:
                rare_variant_dict[variant_id] = variant
                project_variants.append(variant)
            else:
                for indiv_id, genotype in variant.genotypes.items():
                    existing_genotype = rare_variant_dict[variant_id].genotypes.get(indiv_id)
                    if not existing_genotype or existing_genotype.num_alt == -1:
                        rare_variant_dict[variant_id].genotypes[indiv_id] = genotype
        if project != main_project:
            add_extra_info_to_variants_project(get_reference(), project, project_variants)
        rare_variants.extend(project_variants)

    all_variants = sum([i['variants'] for i in individ_ids_and_variants], rare_variants)
    add_extra_info_to_variants_project(get_reference(), main_project, all_variants, add_family_tags=True)
    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(download_csv, gene.get("symbol") or gene.get("transcript_name"))

        def get_row(variant, worst_annotation):
            if 'clinvar_allele_id' in variant.extras:
                measureset_id = variant.extras['clinvar_allele_id']
                clinvar_significance = variant.extras['clinvar_clinsig']
            else:
                measureset_id, clinvar_significance = get_reference().get_clinvar_info(*variant.unique_tuple())
            genotypes = []

            all_genotypes_string = ""
            for indiv_id in individuals_to_include:
                if indiv_id in variant.genotypes and variant.genotypes[indiv_id].num_alt > 0:
                    genotype = variant.genotypes[indiv_id]
                    allele_string = ">".join(genotype.alleles)
                    all_genotypes_string += indiv_id + ":" + allele_string + "  "
                    genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                else:
                    genotypes.append("")
            return [
                gene["symbol"],
                variant.chr,
                variant.pos,
                variant.ref,
                variant.alt,
                variant.vcf_id or variant.annotation.get("rsid") or "",
                variant.annotation.get("vep_consequence") or "",
                worst_annotation.get("hgvsc") or "",
                (worst_annotation.get("hgvsp") or "").replace("%3D", "="),
                variant.annotation.get("sift") or "",
                variant.annotation.get("polyphen") or "",
                variant.annotation.get("mutationtaster_pred") or variant.annotation.get("muttaster") or "",
                (";".join(set((worst_annotation.get("fathmm_pred") or "").split('%3B')))) or variant.annotation.get("fathmm") or "",

                measureset_id or "",
                clinvar_significance or "",

                variant.annotation["freqs"].get("1kg_wgs_phase3") or variant.annotation["freqs"].get("1kg_wgs_AF") or "",
                variant.annotation["freqs"].get("1kg_wgs_phase3_popmax") or variant.annotation["freqs"].get("1kg_wgs_popmax_AF") or "",
                variant.annotation["freqs"].get("exac_v3") or variant.annotation["freqs"].get("exac_v3_AF") or "",
                variant.annotation["freqs"].get("exac_v3_popmax") or variant.annotation["freqs"].get("exac_v3_popmax_AF") or "",
                variant.annotation["freqs"].get("gnomad_exomes_AF") or "",
                variant.annotation["freqs"].get("gnomad_exomes_popmax_AF") or "",
                variant.annotation["freqs"].get("gnomad_genomes_AF") or "",
                variant.annotation["freqs"].get("gnomad_genomes_popmax_AF") or "",
                all_genotypes_string,
            ] + genotypes

        if download_csv == 'knockouts':

            individuals_to_include = [individ_id_and_variants["indiv_id"] for individ_id_and_variants in individ_ids_and_variants]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        if indiv_id in variant.genotypes and variant.genotypes[indiv_id].num_alt > 0:
                            genotype = variant.genotypes[indiv_id]
                            allele_string = ">".join(genotype.alleles)
                            all_genotypes_string += indiv_id + ":" + allele_string + "  "
                            genotypes.append(allele_string + "   (" + str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    rows.append(map(str, get_row(variant, worst_annotation)))

        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]

                rows.append(map(str, get_row(variant, worst_annotation)))

        header = ["gene", "chr", "pos", "ref", "alt", "rsID", "impact",
                  "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                  "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                  "freq_exac_v3", "freq_exac_v3_popmax",
                  "freq_gnomad_exomes", "freq_gnomad_exomes_popmax",
                  "freq_gnomad_genomes", "freq_gnomad_genomes_popmax",
                  "all_genotypes"] + list(map(lambda i: i + " (from %s)" % indiv_id_to_project_id[i], individuals_to_include))

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [v.toJSON() for v in variants]

        individ_ids = {i['indiv_id'] for i in individ_ids_and_variants}
        for var in rare_variants:
            individ_ids.update(var.genotypes.keys())
        individuals = Individual.objects.filter(
            indiv_id__in=individ_ids, project__project_id__in=project_ids
        ).select_related('project').select_related('family').only('project__project_id', 'family__family_id', *Individual.INDIVIDUAL_JSON_FIELDS_NO_IDS)

        return render(request, 'project/gene_quicklook.html', {
            'gene': gene,
            'gene_json': json.dumps(gene),
            'project': main_project,
            'rare_variants_json': json.dumps([v.toJSON() for v in rare_variants]),
            'individuals_json': json.dumps([i.get_json_obj(skip_has_variant_data=True) for i in individuals]),
            'knockouts_json': json.dumps(individ_ids_and_variants),
            'other_projects_json': other_projects_json,
            'new_page_url': new_page_url,
        })
예제 #8
0
파일: project_views.py 프로젝트: rpete/seqr
def gene_quicklook(request, project_id, gene_id):
    """
    Summary of a gene in a project
    """
    project = get_object_or_404(Project, project_id=project_id)
    if not project.can_view(request.user):
        return HttpResponse("Unauthorized")

    if project.project_status == Project.NEEDS_MORE_PHENOTYPES and not request.user.is_staff:
        return render(request, 'analysis_unavailable.html',
                      {'reason': 'Awaiting phenotype data.'})

    if gene_id is None:
        return render(
            request, 'project/gene_quicklook.html', {
                'project': project,
                'gene': None,
                'gene_json': None,
                'rare_variants_json': None,
                'individuals_json': None,
                'knockouts_json': None,
            })

    gene_id = get_gene_id_from_str(gene_id, get_reference())
    gene = get_reference().get_gene(gene_id)
    sys.stderr.write(project_id + " - staring gene search for: %s %s \n" %
                     (gene_id, gene))

    # all rare coding variants
    variant_filter = get_default_variant_filter(
        'all_coding',
        mall.get_annotator().reference_population_slugs)

    rare_variants = []
    for variant in project_analysis.get_variants_in_gene(
            project, gene_id, variant_filter=variant_filter):
        max_af = max(variant.annotation['freqs'].values())
        if not any([
                indiv_id for indiv_id, genotype in variant.genotypes.items()
                if genotype.num_alt > 0
        ]):
            continue
        if max_af < .01:
            rare_variants.append(variant)
    #sys.stderr.write("gene_id: %s, variant: %s\n" % (gene_id, variant.toJSON()['annotation']['vep_annotation']))
    add_extra_info_to_variants_project(get_reference(), project, rare_variants)

    # compute knockout individuals
    individ_ids_and_variants = []
    knockout_ids, variation = get_knockouts_in_gene(project, gene_id)
    for indiv_id in knockout_ids:
        variants = variation.get_relevant_variants_for_indiv_ids([indiv_id])
        add_extra_info_to_variants_project(get_reference(), project, variants)
        individ_ids_and_variants.append({
            'indiv_id': indiv_id,
            'variants': variants,
        })

    sys.stderr.write(
        "Project-wide gene search retrieved %s rare variants for gene: %s \n" %
        (len(rare_variants), gene_id))

    download_csv = request.GET.get('download', '')
    if download_csv:
        response = HttpResponse(content_type='text/csv')
        response[
            'Content-Disposition'] = 'attachment; filename="{}_{}.csv"'.format(
                download_csv, gene["transcript_name"])

        if download_csv == 'knockouts':

            individuals_to_include = [
                individ_id_and_variants["indiv_id"]
                for individ_id_and_variants in individ_ids_and_variants
            ]

            rows = []
            for individ_id_and_variants in individ_ids_and_variants:
                rare_variants = individ_id_and_variants["variants"]
                for variant in rare_variants:
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    genotypes = []
                    all_genotypes_string = ""
                    for indiv_id in individuals_to_include:
                        genotype = variant.genotypes[indiv_id]
                        allele_string = ">".join(genotype.alleles)
                        all_genotypes_string += indiv_id + ":" + allele_string + "  "
                        if genotype.num_alt > 0:
                            genotypes.append(allele_string + "   (" +
                                             str(genotype.gq) + ")")
                        else:
                            genotypes.append("")

                    measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                        variant.unique_tuple(), ("", ""))

                    rows.append(
                        map(str, [
                            gene["symbol"],
                            variant.chr,
                            variant.pos,
                            variant.ref,
                            variant.alt,
                            variant.vcf_id or "",
                            variant.annotation.get("vep_consequence", ""),
                            worst_annotation.get("hgvsc", ""),
                            worst_annotation.get("hgvsp", "").replace(
                                "%3D", "="),
                            worst_annotation.get("sift", ""),
                            worst_annotation.get("polyphen", ""),
                            worst_annotation.get("mutationtaster_pred", ""),
                            ";".join(
                                set(
                                    worst_annotation.get("fathmm_pred",
                                                         "").split('%3B'))),
                            measureset_id,
                            clinvar_significance,
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3", ""),
                            variant.annotation["freqs"].get(
                                "1kg_wgs_phase3_popmax", ""),
                            variant.annotation["freqs"].get("exac_v3", ""),
                            variant.annotation["freqs"].get(
                                "exac_v3_popmax", ""),
                            all_genotypes_string,
                        ] + genotypes))
        elif download_csv == 'rare_variants':
            individuals_to_include = []
            for variant in rare_variants:
                for indiv_id, genotype in variant.genotypes.items():
                    if genotype.num_alt > 0 and indiv_id not in individuals_to_include:
                        individuals_to_include.append(indiv_id)
            rows = []
            for variant in rare_variants:
                worst_annotation_idx = variant.annotation[
                    "worst_vep_index_per_gene"][gene_id]
                worst_annotation = variant.annotation["vep_annotation"][
                    worst_annotation_idx]
                genotypes = []
                all_genotypes_string = ""
                for indiv_id in individuals_to_include:
                    genotype = variant.genotypes[indiv_id]
                    allele_string = ">".join(genotype.alleles)
                    all_genotypes_string += indiv_id + ":" + allele_string + "  "
                    if genotype.num_alt > 0:
                        genotypes.append(allele_string + "   (" +
                                         str(genotype.gq) + ")")
                    else:
                        genotypes.append("")

                measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(
                    variant.unique_tuple(), ("", ""))
                rows.append(
                    map(str, [
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        all_genotypes_string,
                    ] + genotypes))

        header = [
            "gene", "chr", "pos", "ref", "alt", "rsID", "impact", "HGVS.c",
            "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id",
            "clinvar_clinical_sig", "freq_1kg_wgs_phase3",
            "freq_1kg_wgs_phase3_popmax", "freq_exac_v3",
            "freq_exac_v3_popmax", "all_genotypes"
        ] + individuals_to_include

        writer = csv.writer(response)
        writer.writerow(header)
        for row in rows:
            writer.writerow(row)
        return response
    else:
        for individ_id_and_variants in individ_ids_and_variants:
            variants = individ_id_and_variants["variants"]
            individ_id_and_variants["variants"] = [
                v.toJSON() for v in variants
            ]

        return render(
            request, 'project/gene_quicklook.html', {
                'gene':
                gene,
                'gene_json':
                json.dumps(gene),
                'project':
                project,
                'rare_variants_json':
                json.dumps([v.toJSON() for v in rare_variants]),
                'individuals_json':
                json.dumps(
                    [i.get_json_obj() for i in project.get_individuals()]),
                'knockouts_json':
                json.dumps(individ_ids_and_variants),
            })
예제 #9
0
    def search_for_genes(self,
                         gene_or_variant_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01,
                         knockouts=False,
                         in_clinvar_only=False,
                         include_non_coding=False):
        """
        Search for a gene across project(s)

        Args:
            gene_or_variant_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
            in_clinvar_only (bool):
            include_non_coding (bool):
        """

        projects = [
            Project.objects.get(project_id=project_id)
            for project_id in project_id_list
        ]

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "gnomad-exomes",
            "gnomad-genomes", "families", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        if not knockouts:
            variant_filter = get_default_variant_filter(
                'all_coding',
                mall.get_annotator().reference_population_slugs)
            #variant_filter.set_max_AF(max_af)
            if include_non_coding:
                variant_filter.so_annotations = []
            print("All Filters: ")
            pprint(variant_filter.toJSON())

        #print("Max AF threshold: %s" % max_af)
        print("Starting search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_or_variant_ids), ", ".join(
                  [p.project_id for p in projects])))

        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project).project_collection_is_loaded(
                    project):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            indiv_cache = {}
            for gene_or_variant_id in gene_or_variant_ids:
                chrom_pos_match = re.match("([0-9XY]{1,2})-([0-9]{1,9})",
                                           gene_or_variant_id)
                chrom_pos_ref_alt_match = re.match(
                    "([0-9XY]{1,2})-([0-9]{1,9})-([ACTG]+)-([ACTG]+)",
                    gene_or_variant_id)

                if chrom_pos_match or chrom_pos_ref_alt_match:
                    chrom = chrom_pos_match.group(1)
                    pos = int(chrom_pos_match.group(2))
                    xpos = genomeloc.get_xpos(chrom, pos)
                    ref = alt = None
                    if chrom_pos_ref_alt_match:
                        ref = chrom_pos_ref_alt_match.group(3)
                        alt = chrom_pos_ref_alt_match.group(4)

                    variant = get_project_datastore(
                        project).get_single_variant(project.project_id, None,
                                                    xpos, ref, alt)
                    if variant is None:
                        continue
                    variants = [variant]
                    print("-- searching %s for variant %s-%s-%s: found %s" %
                          (project_id, xpos, ref, alt, variant))
                    worst_annotation_idx = variant.annotation[
                        'worst_vep_annotation_index']
                    print(variant.annotation["vep_annotation"]
                          [worst_annotation_idx])
                    gene_id = variant.annotation["vep_annotation"][
                        worst_annotation_idx]['gene_id']
                    gene = get_reference().get_gene(gene_id)
                else:
                    gene_id = get_gene_id_from_str(gene_or_variant_id,
                                                   get_reference())
                    gene = get_reference().get_gene(gene_id)
                    print("-- searching %s for gene %s (%s)" %
                          (project_id, gene["symbol"], gene_id))

                    if knockouts:
                        knockout_ids, variation = project_analysis.get_knockouts_in_gene(
                            project, gene_id)
                        variants = variation.get_relevant_variants_for_indiv_ids(
                            knockout_ids)
                    else:
                        variants = project_analysis.get_variants_in_gene(
                            project, gene_id, variant_filter=variant_filter)

                for variant in variants:
                    if not chrom_pos_match and not chrom_pos_ref_alt_match and max(
                            variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"].get(gene_id)

                    if worst_annotation_idx is not None:
                        worst_annotation = variant.annotation[
                            "vep_annotation"][worst_annotation_idx]
                    else:
                        worst_annotation = None
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    family_ids = set()
                    for indiv_id, genotype in variant.genotypes.items():
                        if indiv_id in indiv_cache:
                            individual = indiv_cache[indiv_id]
                            if individual == 'deleted':
                                continue
                        else:
                            try:
                                individual = Individual.objects.get(
                                    project=project, indiv_id=indiv_id)
                                indiv_cache[indiv_id] = individual
                            except ObjectDoesNotExist:
                                # this can happen when an individual is deleted from the project - from postgres, but not from mong
                                indiv_cache[indiv_id] = 'deleted'
                                continue
                            except MultipleObjectsReturned:
                                # when several families have an individual with the same id
                                individuals = Individual.objects.filter(
                                    project=project, indiv_id=indiv_id)
                                individual = individuals[0]
                                indiv_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            family_ids.add(individual.family.family_id)
                            all_genotypes_list.append(
                                "%s/%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (individual.family.family_id, indiv_id,
                                 "[Affected]" if individual.affected == "A"
                                 else ("[-]" if individual.affected == "N" else
                                       "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    if len(all_genotypes_list) == 0:
                        continue

                    measureset_id, clinvar_significance = get_reference(
                    ).get_clinvar_info(*variant.unique_tuple())
                    if in_clinvar_only and (
                            not clinvar_significance
                            or "path" not in clinvar_significance.lower()):
                        continue

                    row = map(str, [
                        project_id,
                        gene,
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", "")
                        if worst_annotation else "",
                        (worst_annotation.get("hgvsp", "") or "").replace(
                            "%3D", "=") if worst_annotation else "",
                        worst_annotation.get("sift", "")
                        if worst_annotation else "",
                        worst_annotation.get("polyphen", "")
                        if worst_annotation else "",
                        worst_annotation.get("mutationtaster_pred", "")
                        if worst_annotation else "",
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B')))
                        if worst_annotation else "",
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        variant.annotation["freqs"].get("gnomad-exomes2", ""),
                        variant.annotation["freqs"].get("gnomad-genomes2", ""),
                        ", ".join(sorted(list(family_ids))),
                        ", ".join(all_genotypes_list),
                    ])

                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)
예제 #10
0
    def search_for_gene(self, search_gene_id, project_id_list, max_af=0.01):
      '''
        Search for a gene across project(s)
        Args:
          1. search_gene_id: Gene ID to search for
          2. proj_list: An optional list of projects to narrow down search to
      '''
      gene_id = get_gene_id_from_str(search_gene_id, get_reference())
      gene = get_reference().get_gene(gene_id)
      
      print("Staring gene search for: %s %s in projects: %s\n" % (search_gene_id, gene['gene_id'], ", ".join(project_id_list)))
      print("Max AF threshold: %s" % max_af)

      # all rare coding variants
      variant_filter = get_default_variant_filter('all_coding', mall.get_annotator().reference_population_slugs)
      print("All Filters: ")
      pprint(variant_filter.toJSON())

      output_filename = 'results_'+search_gene_id + '.tsv'
      outfile = open(output_filename,'w')

      header = ["project_id","gene", "chr", "pos", "ref", "alt", "rsID", "filter", "impact",
                "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster", "fathmm", "clinvar_id", "clinvar_clinical_sig",
                "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
                "freq_exac_v3", "freq_exac_v3_popmax",
                "all_genotypes"]

      
      writer = csv.writer(outfile,delimiter='\t')
      writer.writerow(header)
      
      if project_id_list: 
          for project_id in project_id_list:
              project = Project.objects.filter(project_id=project_id)[0]  # TODO validate
      else:
          project_id_list = [p.project_id for p in Project.objects.all()]
      
      for project_id in project_id_list:
          project = Project.objects.filter(project_id=project_id)[0]
          if get_project_datastore(project_id).project_collection_is_loaded(project_id):
              print("Running on project %s" % project_id)
          else:
              print("Skipping project %s - gene search is not enabled for this project" % project_id)
              continue

          for variant in project_analysis.get_variants_in_gene(project, gene_id, variant_filter=variant_filter):
              if max(variant.annotation['freqs'].values()) >= max_af:
                  continue
              #pprint(variant.toJSON())
              add_extra_info_to_variants_project(get_reference(), project, [variant])

              worst_annotation_idx = variant.annotation["worst_vep_index_per_gene"][gene_id]
              worst_annotation = variant.annotation["vep_annotation"][worst_annotation_idx]
              all_genotypes_list = []
              pass_filter = "N/A"
              for indiv_id, genotype in variant.genotypes.items():
                  pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                  if genotype.num_alt > 0:
                    all_genotypes_list.append("%s[gt:%s GQ:%s AB:%0.3f]" % (indiv_id, ">".join(genotype.alleles), genotype.gq, genotype.ab if genotype.ab is not None else float('NaN')))

              measureset_id, clinvar_significance = settings.CLINVAR_VARIANTS.get(variant.unique_tuple(), ("", ""))
              row = map(str,
                  [project_id, 
                    gene["symbol"],
                    variant.chr,
                    variant.pos,
                    variant.ref,
                    variant.alt,
                    variant.vcf_id or "",
                    pass_filter,
                    variant.annotation.get("vep_consequence", ""),
                    worst_annotation.get("hgvsc", ""),
                    worst_annotation.get("hgvsp", "").replace("%3D", "="),
                    worst_annotation.get("sift", ""),
                    worst_annotation.get("polyphen", ""),
                    worst_annotation.get("mutationtaster_pred", ""),
                    ";".join(set(worst_annotation.get("fathmm_pred", "").split('%3B'))),
                    measureset_id,
                    clinvar_significance,
                    variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                    variant.annotation["freqs"].get("1kg_wgs_phase3_popmax", ""),
                    variant.annotation["freqs"].get("exac_v3", ""),
                    variant.annotation["freqs"].get("exac_v3_popmax", ""),
                    ", ".join(all_genotypes_list),
                  ])
              writer.writerow(row)
      
      outfile.close()        
      print("Wrote out %s" % output_filename)
예제 #11
0
    def search_for_genes(self,
                         gene_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01):
        """
        Search for a gene across project(s)

        Args:
            gene_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
        """

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        variant_filter = get_default_variant_filter(
            'all_coding',
            mall.get_annotator().reference_population_slugs)
        print("All Filters: ")
        pprint(variant_filter.toJSON())

        if project_id_list:
            projects = [
                Project.objects.get(project_id=project_id)
                for project_id in project_id_list
            ]
        else:
            projects = Project.objects.all()

        print("Max AF threshold: %s" % max_af)
        print("Staring gene search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_ids), ", ".join([p.project_id
                                               for p in projects])))

        indiv_id_cache = {}
        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project_id).project_collection_is_loaded(
                    project_id):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            for gene_id in gene_ids:
                gene_id = get_gene_id_from_str(gene_id, get_reference())

                gene = get_reference().get_gene(gene_id)
                print("-- searching %s for gene %s (%s)" %
                      (project_id, gene["symbol"], gene_id))

                for variant in project_analysis.get_variants_in_gene(
                        project, gene_id, variant_filter=variant_filter):
                    if max(variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])

                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"][gene_id]
                    worst_annotation = variant.annotation["vep_annotation"][
                        worst_annotation_idx]
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    for indiv_id, genotype in variant.genotypes.items():
                        individual = indiv_id_cache.get('indiv_id')
                        if individual is None:
                            individual = Individual.objects.get(
                                project=project, indiv_id=indiv_id)
                            indiv_id_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            all_genotypes_list.append(
                                "%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (indiv_id, "[Affected]"
                                 if individual.affected == "A" else
                                 ("[-]" if individual.affected == "N" else
                                  "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    measureset_id, clinvar_significance = get_clinvar_variants(
                    ).get(variant.unique_tuple(), ("", ""))
                    row = map(str, [
                        project_id,
                        gene["symbol"],
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", ""),
                        worst_annotation.get("hgvsp", "").replace("%3D", "="),
                        worst_annotation.get("sift", ""),
                        worst_annotation.get("polyphen", ""),
                        worst_annotation.get("mutationtaster_pred", ""),
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B'))),
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        ", ".join(all_genotypes_list),
                    ])
                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)