Exemplo n.º 1
0
    def handle(self, *args, **options):
        ANALYSIS_TEMPLATES_DIR = os.path.join(settings.BASE_DIR, "analysis",
                                              "data", "analysis_templates")

        user = admin_bot()
        genome_build = GenomeBuild.grch37()  # Doesn't matter for templates

        for filename in glob.glob(f"{ANALYSIS_TEMPLATES_DIR}/*.json"):
            print(filename)
            analysis = analysis_import(user, genome_build, filename)
            analysis.template_type = AnalysisTemplateType.TEMPLATE
            analysis.visible = False
            analysis.save()
            add_public_group_read_permission(analysis)

            analysis_template = AnalysisTemplate.objects.create(
                name=analysis.name, user=user, analysis=analysis)

            analysis_snapshot = analysis.clone()
            analysis_snapshot.template_type = AnalysisTemplateType.SNAPSHOT
            analysis_snapshot.visible = False
            analysis_snapshot.save()
            add_public_group_read_permission(analysis_snapshot)

            analysis_name_template = "%(template)s for %(input)s"
            AnalysisTemplateVersion.objects.create(
                template=analysis_template,
                version=1,
                analysis_name_template=analysis_name_template,
                analysis_snapshot=analysis_snapshot)

            print(f"Created template: {analysis_template}")
Exemplo n.º 2
0
def annotation_scheduler():
    """ This is run on scheduling_single_worker queue to avoid race conditions """
    LOCK_EXPIRE = 60 * 5  # 5 minutes
    lock_id = "annotation-scheduler-lock"

    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    release_lock = lambda: cache.delete(lock_id)

    try:
        if acquire_lock():
            try:
                logging.info("Got the lock for annotation scheduler")
                for genome_build in GenomeBuild.builds_with_annotation():
                    annotation_version = AnnotationVersion.latest(genome_build)
                    variant_annotation_version = annotation_version.variant_annotation_version
                    while True:
                        range_lock = _handle_variant_annotation_version(
                            variant_annotation_version)
                        if range_lock is None:
                            break
            finally:
                logging.info("Releasing lock")
                release_lock()
        else:
            logging.info("Someone else has %s", lock_id)
    except:
        log_traceback()
Exemplo n.º 3
0
    def get(self, request, *args, **kwargs):
        genome_build_name = self.kwargs['genome_build_name']
        variant_string = self.kwargs['variant_string']
        genome_build = GenomeBuild.get_name_or_alias(genome_build_name)
        variant = Variant.get_from_string(variant_string, genome_build)

        if variant is None:
            raise Http404(variant_string)

        vav = VariantAnnotationVersion.latest(genome_build)
        va = variant.variantannotation_set.get(version=vav)
        serializer = VariantAnnotationSerializer(va)
        return Response(serializer.data)
Exemplo n.º 4
0
def create_analysis_from_template(request, genome_build_name):
    data = request.POST.dict()
    tag_uuid = data.pop("tag_uuid")
    analysis_template_key = f"{tag_uuid}-analysis_template"
    analysis_template_name = data.pop(analysis_template_key)
    analysis_template = AnalysisTemplate.get_for_user(request.user,
                                                      analysis_template_name)

    genome_build = GenomeBuild.get_name_or_alias(genome_build_name)
    template_run = AnalysisTemplateRun.create(analysis_template,
                                              genome_build,
                                              user=request.user)
    template_run.populate_arguments(data)
    populate_analysis_from_template_run(template_run)

    return view_active_node(template_run.analysis, None)
Exemplo n.º 5
0
def annotation_versions(request):
    anno_versions = {}
    # Create VariantAnnotationVersion for build if not exists
    for genome_build in GenomeBuild.builds_with_annotation():
        try:
            get_variant_annotation_version(genome_build)
        except:
            log_traceback()

        qs = AnnotationVersion.objects.filter(
            genome_build=genome_build).order_by("-annotation_date")
        vep_command = get_vep_command("in.vcf", "out.vcf", genome_build,
                                      genome_build.annotation_consortium)
        vep_command = " ".join(vep_command).replace(" -", "\n")
        anno_versions[genome_build.name] = (vep_command, qs)

    context = {"annotation_versions": anno_versions}
    return render(request, "annotation/annotation_versions.html", context)
Exemplo n.º 6
0
    def handle(self, *args, **options):
        f = sys.stdout

        genome_build = GenomeBuild.get_name_or_alias("GRCh37")
        header_lines = get_vcf_header_from_contigs(genome_build, {})
        for line in header_lines:
            f.write(line + '\n')

        num_variants = options["num_variants"]
        rows = []
        for _ in range(num_variants):
            ref = random_base()
            alt = random_base(not_base=ref)
            chrom = random_contig()
            pos = randrange(MAX_SIZE)
            rows.append([chrom, pos, '.', ref, alt, '.', '.', '.'])

        rows = sorted(rows, key=lambda x: (x[0], int(x[1])))
        for data in rows:
            line = '\t'.join(map(str, data))
            f.write(line + '\n')
    def handle(self, *args, **options):
        for genome_build in GenomeBuild.builds_with_annotation():
            variant_qs = Variant.objects.filter(
                Variant.get_contigs_q(genome_build), varianttag__isnull=False)
            populate_clingen_alleles_for_variants(
                genome_build, variant_qs)  # Will add VariantAlleles

            va_collection = VariantAlleleCollectionSource.objects.create(
                genome_build=genome_build)
            records = []
            for va in VariantAllele.objects.filter(
                    variant__in=variant_qs):  # VariantAlleles added above
                records.append(
                    VariantAlleleCollectionRecord(collection=va_collection,
                                                  variant_allele=va))

            if records:
                VariantAlleleCollectionRecord.objects.bulk_create(
                    records, batch_size=2000)
            create_liftover_pipelines(admin_bot(), va_collection,
                                      ImportSource.COMMAND_LINE, genome_build)
Exemplo n.º 8
0
def variant_annotation_runs(request):
    as_display = dict(AnnotationStatus.choices)

    genome_build_field_counts = {}
    genome_build_summary = {}

    if request.method == "POST":
        for genome_build in GenomeBuild.builds_with_annotation():
            annotation_runs = AnnotationRun.objects.filter(
                annotation_range_lock__version__genome_build=genome_build)
            message = None
            if f"set-non-finished-to-error-{genome_build.name}" in request.POST:
                num_errored = 0
                non_finished_statuses = [
                    AnnotationStatus.FINISHED, AnnotationStatus.ERROR
                ]
                for annotation_run in annotation_runs.exclude(
                        status__in=non_finished_statuses):
                    if celery_task := annotation_run.task_id:
                        logging.info("Terminating celery job '%s'",
                                     celery_task)
                        app.control.revoke(
                            celery_task, terminate=True)  # @UndefinedVariable
                    annotation_run.error_exception = "Manually failed"
                    annotation_run.save()
                    num_errored += 1
                message = f"{genome_build} - set {num_errored} annotation runs to Error"
            elif f"retry-annotation-runs-{genome_build.name}" in request.POST:
                num_retrying = 0
                for annotation_run in annotation_runs.filter(
                        status=AnnotationStatus.ERROR):
                    annotation_run_retry(annotation_run)
                    num_retrying += 1
                message = f"{genome_build} - retrying {num_retrying} annotation runs."

            if message:
                messages.add_message(request, messages.INFO, message)
Exemplo n.º 9
0
    def setUpClass(cls):
        super().setUpClass()

        user = User.objects.get_or_create(username='******')[0]
        cls.grch37 = GenomeBuild.get_name_or_alias("GRCh37")
        cls.annotation_version_grch37 = get_fake_annotation_version(cls.grch37)
        gene_annotation_release = cls.annotation_version_grch37.gene_annotation_version.gene_annotation_release
        cls.transcript_version = create_fake_transcript_version(cls.grch37,
                                                                release=gene_annotation_release)
        cls.gene_symbol = cls.transcript_version.gene_version.gene_symbol
        cls.trio = create_fake_trio(user, cls.grch37)

        cls.analysis = Analysis(genome_build=cls.grch37)
        cls.analysis.set_defaults_and_save(user)

        cls.sample = cls.trio.get_samples()[0]

        # Gene List
        cls.gene_list = GeneList.objects.get_or_create(name="fake list",
                                                       user=cls.analysis.user,
                                                       import_status=ImportStatus.SUCCESS)[0]
        GeneListGeneSymbol.objects.get_or_create(gene_list=cls.gene_list, gene_symbol=cls.gene_symbol)

        # Need some overlapping variants so gene list will work
        create_fake_variants(cls.grch37)
        # Note: Variant probably doesn't overlap with gene, just want a random one
        variant = Variant.objects.filter(Variant.get_no_reference_q()).first()
        annotation_run = AnnotationRun.objects.create()
        VariantGeneOverlap.objects.create(version=cls.annotation_version_grch37.variant_annotation_version,
                                          annotation_run=annotation_run,
                                          gene=cls.transcript_version.gene,
                                          variant=variant)

        # Tag that variant
        cls.tag = Tag.objects.get_or_create(pk="foo")[0]
        VariantTag.objects.create(genome_build=cls.grch37, analysis=cls.analysis,
                                  variant=variant, tag=cls.tag, user=user)
Exemplo n.º 10
0
    def _add_vep_field_handlers(self):
        # TOPMED and 1k genomes can return multiple values - take highest
        format_pick_highest_float = get_clean_and_pick_single_value_func(max, float)
        format_pick_highest_int = get_clean_and_pick_single_value_func(max, int)
        remove_empty_multiples = get_clean_and_pick_single_value_func(join_uniq)
        # COSMIC v90 (5/9/2019) switched to COSV (build independent identifiers)
        extract_cosmic = get_extract_existing_variation("COSV")
        extract_dbsnp = get_extract_existing_variation("rs")

        # Some variants return 2 rsIds, and 2 frequencies eg "0.6764&0.2433" - take max
        self.field_formatters = {
            "af_1kg": format_pick_highest_float,
            "af_uk10k": format_pick_highest_float,
            "cosmic_count": format_pick_highest_int,
            "cosmic_id": extract_cosmic,
            "cosmic_legacy_id": remove_empty_multiples,
            "dbsnp_rs_id": extract_dbsnp,
            'gnomad_popmax': str.upper,  # nfe -> NFE
            "hgnc_id": format_hgnc_id,
            "sift": format_vep_sift_to_choice,
            "variant_class": get_choice_formatter_func(VariantClass.choices),
            'fathmm_pred_most_damaging': get_most_damaging_func(FATHMMPrediction),
            'impact': get_choice_formatter_func(PathogenicityImpact.CHOICES),
            'interpro_domain': remove_empty_multiples,
            'mastermind_count_1_cdna': get_clean_and_pick_single_value_func(operator.itemgetter(0), int),
            'mastermind_count_2_cdna_prot': get_clean_and_pick_single_value_func(operator.itemgetter(1), int),
            'mastermind_count_3_aa_change': get_clean_and_pick_single_value_func(operator.itemgetter(2), int),
            'mutation_assessor_pred_most_damaging': get_most_damaging_func(MutationAssessorPrediction),
            'mutation_taster_pred_most_damaging': get_most_damaging_func(MutationTasterPrediction),
            'polyphen2_hvar_pred_most_damaging': get_most_damaging_func(Polyphen2Prediction),
            # conservation fields are from BigWig, which can return multiple entries
            # for deletions. Higher = more conserved, so for rare disease filtering taking max makes sense
            'phylop_30_way_mammalian': format_pick_highest_float,
            'phylop_46_way_mammalian': format_pick_highest_float,
            'phylop_100_way_vertebrate': format_pick_highest_float,
            'phastcons_30_way_mammalian': format_pick_highest_float,
            'phastcons_46_way_mammalian': format_pick_highest_float,
            'phastcons_100_way_vertebrate': format_pick_highest_float,
            'somatic': format_vep_somatic,
            'topmed_af': format_pick_highest_float,
        }
        if self.genome_build == GenomeBuild.grch38():
            self.field_formatters["gnomad_filtered"] = gnomad_filtered_func

        self.source_field_to_columns = defaultdict(set)
        self.ignored_vep_fields = self.VEP_NOT_COPIED_FIELDS.copy()

        vc = VEPConfig(self.genome_build)
        # Sort to have consistent VCF headers
        for cvf in ColumnVEPField.filter_for_build(self.genome_build).order_by("source_field"):
            try:
                if cvf.vep_custom:  # May not be configured
                    prefix = cvf.get_vep_custom_display()
                    setting_key = prefix.lower()
                    _ = vc[setting_key]  # May throw exception if not setup
                    if cvf.source_field_has_custom_prefix:
                        self.ignored_vep_fields.append(prefix)

                self.source_field_to_columns[cvf.vep_info_field].add(cvf.variant_grid_column_id)
                # logging.info("Handling column %s => %s", cvf.vep_info_field, cvf.variant_grid_column_id)
            except:
                logging.warning("Skipping custom %s due to missing settings", cvf.vep_info_field)

        self.prediction_pathogenic_values = {
            'sift': SIFTPrediction.get_damage_or_greater_levels(),
            'fathmm_pred_most_damaging': FATHMMPrediction.get_damage_or_greater_levels(),
            'mutation_assessor_pred_most_damaging': MutationAssessorPrediction.get_damage_or_greater_levels(),
            'mutation_taster_pred_most_damaging': MutationTasterPrediction.get_damage_or_greater_levels(),
            'polyphen2_hvar_pred_most_damaging': Polyphen2Prediction.get_damage_or_greater_levels(),
        }
Exemplo n.º 11
0
def create_manual_variant_entry_from_text(request, genome_build_name,
                                          variants_text):
    genome_build = GenomeBuild.get_name_or_alias(genome_build_name)
    create_manual_variants(request.user, genome_build, variants_text)
    return redirect('manual_variant_entry')
Exemplo n.º 12
0
def annotation(request):
    # Set Variables to None for uninstalled components, the template will show installation instructions
    ensembl_biomart_transcript_genes = None
    diagnostic_gene_list = None

    build_contigs = get_build_contigs()
    genome_build_annotations = {}

    builds_ok = []
    for genome_build in GenomeBuild.builds_with_annotation():
        annotation_details = _get_build_annotation_details(
            build_contigs, genome_build)
        genome_build_annotations[genome_build.name] = annotation_details

        builds_ok.append(annotation_details.get("ok", False))

    gene_symbol_alias_counts = get_field_counts(GeneSymbolAlias.objects.all(),
                                                "source")
    if gene_symbol_alias_counts:
        gene_symbol_alias_counts = {
            GeneSymbolAliasSource(k).label: v
            for k, v in gene_symbol_alias_counts.items()
        }

    all_ontologies_accounted_for = True
    ontology_counts = list()
    for service in [
            OntologyService.MONDO, OntologyService.OMIM, OntologyService.HPO,
            OntologyService.HGNC
    ]:
        # don't report HGNC as it's just there as a stub for other items to relate to
        count = OntologyTerm.objects.filter(ontology_service=service).count()
        ontology_counts.append({"service": service, "count": count})

    ontology_services = [
        OntologyService.MONDO, OntologyService.OMIM, OntologyService.HPO,
        OntologyService.HGNC
    ]
    ontology_relationship_counts = dict()
    for first_index, first_service in enumerate(ontology_services):
        for second_service in ontology_services[first_index:]:
            join_count = OntologyTermRelation.objects.filter(
                source_term__ontology_service=first_service,
                dest_term__ontology_service=second_service).count()
            if first_service != second_service:
                reverse_count = OntologyTermRelation.objects.filter(
                    source_term__ontology_service=second_service,
                    dest_term__ontology_service=first_service).count()
                join_count += reverse_count
            ontology_relationship_counts[
                f"{first_service}{second_service}"] = join_count
            ontology_relationship_counts[
                f"{second_service}{first_service}"] = join_count

    ontology_imports = list()
    for context in [
            "mondo_file", "gencc_file", "hpo_file", "omim_file",
            "biomart_omim_aliases", "phenotype_to_genes"
    ]:
        last_import = OntologyImport.objects.filter(
            context=context).order_by('-created').first()
        if not last_import and context != "omim_file":  # don't complain about omim_file not being imported as not available to environments without license
            all_ontologies_accounted_for = False
        ontology_imports.append({
            "context": context,
            "last_import": last_import
        })

    diagnostic = GeneListCategory.objects.get(name='Diagnostic')
    diagnostic_gene_list_count = diagnostic.genelist_set.all().count()
    if diagnostic_gene_list_count:
        diagnostic_gene_list = f"{diagnostic_gene_list_count} diagnostic gene lists"

    clinvar_citations = ClinVarCitation.objects.count()
    if clinvar_citations:
        num_cached_clinvar_citations = CachedCitation.objects.count()
        clinvar_citations = f"{clinvar_citations} ClinVar citations ({num_cached_clinvar_citations} cached)"

    hpa_version = HumanProteinAtlasAnnotationVersion.objects.order_by(
        "-annotation_date").first()
    hpa_counts = HumanProteinAtlasAnnotation.objects.filter(
        version=hpa_version).count()

    somalier = None
    if somalier_enabled := settings.SOMALIER.get("enabled"):
        somalier = _verify_somalier_config()
Exemplo n.º 13
0
                    annotation_run.error_exception = "Manually failed"
                    annotation_run.save()
                    num_errored += 1
                message = f"{genome_build} - set {num_errored} annotation runs to Error"
            elif f"retry-annotation-runs-{genome_build.name}" in request.POST:
                num_retrying = 0
                for annotation_run in annotation_runs.filter(
                        status=AnnotationStatus.ERROR):
                    annotation_run_retry(annotation_run)
                    num_retrying += 1
                message = f"{genome_build} - retrying {num_retrying} annotation runs."

            if message:
                messages.add_message(request, messages.INFO, message)

    for genome_build in GenomeBuild.builds_with_annotation():
        qs = AnnotationRun.objects.filter(
            annotation_range_lock__version__genome_build=genome_build)
        field_counts = get_field_counts(qs, "status")
        summary_data = Counter()
        for field, count in field_counts.items():
            summary = AnnotationStatus.get_summary_state(field)
            summary_data[summary] += count

        genome_build_summary[genome_build.pk] = summary_data
        genome_build_field_counts[genome_build.pk] = {
            as_display[k]: v
            for k, v in field_counts.items()
        }
    context = {
        "genome_build_summary": genome_build_summary,
Exemplo n.º 14
0
            variant_tag, created = VariantTag.objects.get_or_create(
                variant_id=variant_id,
                tag=tag,
                genome_build=genome_build,
                location=location,
                analysis=analysis,
                user=request.user)
            if node_id:
                variant_tag.node_id = node_id
                variant_tag.save()
        else:
            if genome_build_name is None:
                raise ValueError(
                    "Adding requires either 'analysis_id' or 'genome_build_name'"
                )
            genome_build = GenomeBuild.get_name_or_alias(genome_build_name)

            variant_tag, created = VariantTag.objects.get_or_create(
                variant_id=variant_id,
                tag=tag,
                analysis=None,
                location=location,
                user=request.user,
                defaults={"genome_build": genome_build})
        if created:  # Only return new if anything created
            ret = VariantTagSerializer(variant_tag,
                                       context={
                                           "request": request
                                       }).data
    elif op == 'del':
        # Deletion of tags is for analysis (all users)