def annotation_scheduler(): """ This is run on scheduling_single_worker queue to avoid race conditions """ LOCK_EXPIRE = 60 * 5 # 5 minutes lock_id = "annotation-scheduler-lock" # cache.add fails if if the key already exists acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE) release_lock = lambda: cache.delete(lock_id) try: if acquire_lock(): try: logging.info("Got the lock for annotation scheduler") for genome_build in GenomeBuild.builds_with_annotation(): annotation_version = AnnotationVersion.latest(genome_build) variant_annotation_version = annotation_version.variant_annotation_version while True: range_lock = _handle_variant_annotation_version( variant_annotation_version) if range_lock is None: break finally: logging.info("Releasing lock") release_lock() else: logging.info("Someone else has %s", lock_id) except: log_traceback()
def wait_for_node(node_id): """ Used to build a dependency on a node that's already loading. The danger here is that we'll end up taking up a celery worker, waiting for the parent to become available... so we have to be careful """ EVENT_NAME = "wait_for_node" #logging.info("wait_for_node: %s", node_id) try: TIME_BETWEEN_CHECKS = [5, 5, 10, 10, 30, 30, 60, MINUTE_SECS * 2] total_time = 0 for sleep_time in TIME_BETWEEN_CHECKS: # Any exception will exit this task which is ok node = AnalysisNode.objects.get(pk=node_id) #logging.info("node status: %s", node.status) if NodeStatus.is_ready(node.status): #logging.info("Node was ready") return try: node_task = NodeTask.objects.get(node=node, version=node.version) if node_task.celery_task: wait_for_task(node_task.celery_task) return except NodeTask.DoesNotExist: pass # QUEUED - there won't be a celery task yet if node.status == NodeStatus.QUEUED: details = f"Waiting on parent node {node_id} which is QUEUED" details += f" - waiting for {sleep_time} secs, {total_time} so far!" create_event(None, EVENT_NAME, details, severity=LogLevel.WARNING) else: details = f"Waiting on parent node {node_id} status {node.status} no celery task!" create_event(None, EVENT_NAME, details, severity=LogLevel.ERROR) return logging.info("Sleeping for %d seconds", sleep_time) sleep(sleep_time) total_time += sleep_time # Timeout details = f"Waited on parent node {node_id} for {total_time} seconds. " details += "Didn't become available, dying so we don't cause a deadlock!" create_event(None, EVENT_NAME, details, severity=LogLevel.ERROR) except: log_traceback()
def delete_annotation_run(annotation_run_id): try: annotation_run = AnnotationRun.objects.get(pk=annotation_run_id) annotation_run.status = AnnotationStatus.DELETING annotation_run.save() annotation_run.delete() except: log_traceback() raise
def create_and_launch_analysis_tasks(analysis_id, run_async=True): """ This is run in a single worker queue so that we avoid race conditions""" try: tasks = _get_analysis_update_tasks(analysis_id) except: log_traceback() raise for t in tasks: execute_task(t, run_async=run_async)
def _verify_somalier_config() -> Optional[str]: somalier_cfg = SomalierConfig() somalier_bin = somalier_cfg.get_annotation("command") somalier = None try: somalier_output = check_output([somalier_bin], stderr=subprocess.STDOUT) somalier = somalier_output.decode().split("\n", 1)[0] except: log_traceback() return somalier
class NodeJSONViewMixin(View, ABC): @abstractmethod def _get_node(self, request, **kwargs) -> AnalysisNode: pass @abstractmethod def _get_data(self, request, node: AnalysisNode, *args, **kwargs): pass def _get_redirect(self, request, node: AnalysisNode): return None def get_response(self, request, *args, **kwargs): try: node = self._get_node(request, **kwargs) if ret := self._get_redirect(request, node): return ret data = self._get_data(request, *args, node=node, **kwargs) status = HTTP_200_OK except NonFatalNodeError as e: log_traceback() data = {"message": str(e), "non_fatal": True} status = e.status if isinstance(e, NodeNotFoundException): # Is the node deleted, or perhaps it was just out of version? if e.node_id: if not AnalysisNode.objects.filter(pk=e.node_id).exists(): data["deleted_nodes"] = [e.node_id] else: # We could potentially redirect to the same page with new version # but client will probably just ditch this and re-request latest anyway pass
def annotation_versions(request): anno_versions = {} # Create VariantAnnotationVersion for build if not exists for genome_build in GenomeBuild.builds_with_annotation(): try: get_variant_annotation_version(genome_build) except: log_traceback() qs = AnnotationVersion.objects.filter( genome_build=genome_build).order_by("-annotation_date") vep_command = get_vep_command("in.vcf", "out.vcf", genome_build, genome_build.annotation_consortium) vep_command = " ".join(vep_command).replace(" -", "\n") anno_versions[genome_build.name] = (vep_command, qs) context = {"annotation_versions": anno_versions} return render(request, "annotation/annotation_versions.html", context)
def node_cache_task(node_id, version): logging.info("node_cache_task: %s/%d", node_id, version) try: node = AnalysisNode.objects.get_subclass(pk=node_id, version=version) except AnalysisNode.DoesNotExist: # @UndefinedVariable # Node was deleted - this task is obsolete return node_cache = NodeCache.objects.get(node_version=node.node_version) variant_collection = node_cache.variant_collection if variant_collection.status != ProcessingStatus.CREATED: return if not (node.is_valid() and node.modifies_parents()): logging.debug("Not doing anything for node %s", node.pk) variant_collection.status = ProcessingStatus.SKIPPED variant_collection.save() return variant_collection.status = ProcessingStatus.PROCESSING variant_collection.save() # Node could be READY, we're just generating cache afterwards (node doesn't need but children do) if node.status != NodeStatus.READY: node.set_node_task_and_status(node_cache_task.request.id, NodeStatus.LOADING_CACHE) try: node.write_cache(variant_collection) processing_status = ProcessingStatus.SUCCESS status = NodeStatus.LOADING except: log_traceback() processing_status = ProcessingStatus.ERROR status = NodeStatus.ERROR variant_collection.status = processing_status variant_collection.save() if node.status != NodeStatus.READY: node.status = status node.save()
if hl: func, arg = hl try: match_type, records = func(arg) if match_type == PhenotypeMatchTypes.HPO: hpo_list.extend(records) elif match_type == PhenotypeMatchTypes.OMIM: omim_alias_list.extend(records) elif match_type == PhenotypeMatchTypes.GENE: gene_symbols.extend(records) # logging.info("Got exact: %s => %s", text, records) except Exception as e: msg = f"Error: {e}, func: {func}, arg={arg}" logging.error(msg) log_traceback() # raise ValueError(msg) return hpo_list, omim_alias_list, gene_symbols @classmethod def _skip_word(cls, lower_text): """ Return true to skip a word, throws SkipAllPhenotypeMatchException to skip all. Only need to skip >MIN_LENGTH words as will do that later (after exact) """ # For multi-words where you want to skip components SKIP_ALL = { "library prep", "to cgf", "set up", "ad pattern", "recurrent eps", "rest of" } if lower_text in SKIP_ALL:
def get_evidence_fields_from_preferred_transcript( genome_build: GenomeBuild, variant: Variant, clingen_allele: ClinGenAllele, hgvs_matcher: HGVSMatcher, transcript_version: TranscriptVersion, evidence_transcript_columns, ekey_formatters, annotation_version) -> AutopopulateData: data = transcript_autopopulate(transcript_version) data[SpecialEKeys.GENE_SYMBOL] = transcript_version.gene_version.gene_symbol_id # Populate from TranscriptVersion data 1st (so can overwrite later) if variant: try: vts = VariantTranscriptSelections(variant, genome_build, annotation_version) transcript_data = vts.get_transcript_annotation(transcript_version) for evidence_key, transcript_config in evidence_transcript_columns.items(): variant_column = transcript_config['col'] immutable = transcript_config['immutable'] # Getting out of dict directly, not joining via queryset transcript_column = variant_column.replace("variantannotation__", "") value = transcript_data.get(transcript_column) if value is not None: set_evidence(data, evidence_key, value, immutable, ekey_formatters) gene_symbol = transcript_version.gene_version.gene_symbol data[SpecialEKeys.INTERNAL_SAMPLES_20X_COVERAGE] = get_20x_gene_coverage(gene_symbol) phastcons_dict = { "phastcons_30_way_mammalian": "30 way mammalian", "phastcons_46_way_mammalian": "46 way mammalian", "phastcons_100_way_vertebrate": "100 way vertebrate", } data[SpecialEKeys.PHASTCONS] = get_set_fields_summary(transcript_data, phastcons_dict, phastcons_dict) phylop_dict = { "phylop_30_way_mammalian": "30 way mammalian", "phylop_46_way_mammalian": "46 way mammalian", "phylop_100_way_vertebrate": "100 way vertebrate", } data[SpecialEKeys.PHYLOP] = get_set_fields_summary(transcript_data, phylop_dict, phylop_dict) if variant_annotation := vts.variant_annotation: data[SpecialEKeys.SEARCH_TERMS] = variant_annotation.get_search_terms() if settings.ANNOTATION_PUBMED_SEARCH_TERMS_ENABLED: data[SpecialEKeys.PUBMED_SEARCH_TERMS] = variant_annotation.get_pubmed_search_terms() except: log_traceback() try: c_hgvs = hgvs_matcher.variant_to_c_hgvs_parts(variant, transcript_version.accession) if c_hgvs: data[SpecialEKeys.C_HGVS] = c_hgvs.full_c_hgvs except Exception as e: value_obj = {} data.message = 'Could not parse HGVS value %s' % str(e) data[SpecialEKeys.C_HGVS] = value_obj if clingen_allele: p_hgvs = clingen_allele.get_p_hgvs(transcript_version.accession, match_version=False) else: p_hgvs = None data[SpecialEKeys.P_HGVS] = p_hgvs gene_symbol_id = transcript_version.gene_version.gene_symbol_id gnomad_oe_lof_summary = get_gnomad_oe_lof_summary(transcript_version) if gnomad_oe_lof_summary: data[SpecialEKeys.GNOMAD_OE_LOF] = gnomad_oe_lof_summary gs_count = GeneSymbolPubMedCount.get_for_gene_symbol(gene_symbol_id) data[SpecialEKeys.PUBMED_GENE_SEARCH_COUNT] = {"value": gs_count.count, "note": f"Retrieved {gs_count.modified.date()}"} return data