Exemplo n.º 1
0
    def process(self):
        if self.job.status == JobStatus.CREATED:
            self.job.status = JobStatus.IN_PROGRESS

        elif self.job.status == JobStatus.IN_PROGRESS:
            self.job.retry_count = self.job.retry_count + 1
            try:
                self._process()
                self.job.status = JobStatus.WAITING_FOR_CHILDREN
            except Exception as e:
                if isinstance(e, ETLExceptions):
                    message = {"message": str(e), "code": e.code}
                else:
                    message = {'message': str(e)}
                if self.job.retry_count == self.job.max_retry:
                    self.job.status = JobStatus.FAILED
                    self.job.message = message
                    self._job_failed()

        elif self.job.status == JobStatus.WAITING_FOR_CHILDREN:
            self._check_children()

        logger.info("Job %s in status: %s" % (str(self.job.id), JobStatus(self.job.status).name))
        self._unlock()
        self._save()
Exemplo n.º 2
0
    def test_fetch_samples1(self):
        """
        Test fetching samples for a request from IGO LIMS
        Should import Pooled Normal samples automatically
        TODO: Mock LIMS API for this test and then remove skip
        """
        # sanity check that starting db is empty
        files = File.objects.all()
        files_metadata = FileMetadata.objects.all()
        jobs = Job.objects.all()
        self.assertTrue(len(files) == 0)
        self.assertTrue(len(files_metadata) == 0)
        self.assertTrue(len(jobs) == 0)

        request_id = "10075_D"
        child_jobs = fetch_samples(request_id=request_id)

        # check that jobs were created successfully
        jobs = Job.objects.all()
        job_ids = [job.id for job in jobs]
        self.assertTrue(len(jobs) == len(child_jobs))
        self.assertTrue(len(jobs) == 17)
        for child_job in child_jobs:
            self.assertTrue(UUID(child_job) in job_ids)

        # need to run the job scheduler at least twice to completely process all jobs
        # TODO: need to split apart the IGO LIMS query from the sample import logic, so we can pass in mock JSON blob representing expected IGO LIMS API response to avoid having to actually query the real API for testing
        print(">>> running job scheduler")
        scheduler()
        scheduler()
        scheduler()
        print(">>> job scheduler complete")

        # check that all jobs completed successfully
        jobs = Job.objects.filter(
            run='beagle_etl.jobs.lims_etl_jobs.create_pooled_normal').all()
        for job in jobs:
            print("%s %s" % (job.run, JobStatus(job.status).name))
            self.assertTrue(job.status == JobStatus.COMPLETED)

        # check for updated files in the database
        files = File.objects.all()
        files_metadata = FileMetadata.objects.all()
        self.assertTrue(len(files) == 22)
        self.assertTrue(len(files_metadata) == 22)

        import_files = File.objects.filter(
            file_group=settings.IMPORT_FILE_GROUP)
        import_files_metadata = FileMetadata.objects.filter(
            file__in=[i.id for i in import_files])
        pooled_normal_files = File.objects.filter(
            file_group=settings.POOLED_NORMAL_FILE_GROUP)
        pooled_normal_files_metadata = FileMetadata.objects.filter(
            file__in=[i.id for i in pooled_normal_files])
        self.assertTrue(len(import_files) == 10)
        self.assertTrue(len(import_files_metadata) == 10)
        self.assertTrue(len(pooled_normal_files) == 12)
        self.assertTrue(len(pooled_normal_files_metadata) == 12)
Exemplo n.º 3
0
def request_callback(request_id, job_group=None, job_group_notifier=None):
    jg = None
    jgn = None
    try:
        jgn = JobGroupNotifier.objects.get(id=job_group_notifier)
        logger.debug("[RequestCallback] JobGroup id: %s", job_group)
    except JobGroupNotifier.DoesNotExist:
        logger.debug("[RequestCallback] JobGroup not set")
    job_group_notifier_id = str(jgn.id) if jgn else None
    assays = ETLConfiguration.objects.first()

    recipe = LIMSClient.get_request_samples(request_id).get("recipe", None)

    if (not all([
            JobStatus(job["status"]) == JobStatus.COMPLETED for job in
            Job.objects.filter(job_group=job_group,
                               run=TYPES["SAMPLE"],
                               args__igocomplete=True).values("status")
    ]) and recipe in settings.WES_ASSAYS):
        wes_job_failed = WESJobFailedEvent(job_group_notifier_id, recipe)
        send_notification.delay(wes_job_failed.to_dict())

    if not recipe:
        raise FailedToSubmitToOperatorException(
            "Not enough metadata to choose the operator for requestId:%s" %
            request_id)

    if not all(item in assays.all_recipes for item in [recipe]):
        ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict()
        send_notification.delay(ci_review_e)
        set_unknown_assay_label = SetLabelEvent(
            job_group_notifier_id, "unrecognized_assay").to_dict()
        send_notification.delay(set_unknown_assay_label)
        unknown_assay_event = UnknownAssayEvent(job_group_notifier_id,
                                                recipe).to_dict()
        send_notification.delay(unknown_assay_event)
        return []

    if any(item in assays.hold_recipes for item in [
            recipe,
    ]):
        admin_hold_event = AdminHoldEvent(job_group_notifier_id).to_dict()
        send_notification.delay(admin_hold_event)
        custom_capture_event = CustomCaptureCCEvent(job_group_notifier_id,
                                                    recipe).to_dict()
        send_notification.delay(custom_capture_event)
        return []

    if any(item in assays.disabled_recipes for item in [
            recipe,
    ]):
        not_for_ci = NotForCIReviewEvent(job_group_notifier_id).to_dict()
        send_notification.delay(not_for_ci)
        disabled_assay_event = DisabledAssayEvent(job_group_notifier_id,
                                                  recipe).to_dict()
        send_notification.delay(disabled_assay_event)
        return []

    if len(
            FileRepository.filter(metadata={
                "requestId": request_id
            },
                                  values_metadata="recipe").all()) == 0:
        no_samples_event = AdminHoldEvent(job_group_notifier_id).to_dict()
        send_notification.delay(no_samples_event)
        return []

    if not all([
            JobStatus(job["status"]) == JobStatus.COMPLETED
            for job in Job.objects.filter(job_group=job_group).values("status")
    ]):
        ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict()
        send_notification.delay(ci_review_e)

    lab_head_email = FileRepository.filter(
        metadata={
            "requestId": request_id
        }, values_metadata="labHeadEmail").first()
    try:
        if lab_head_email.split("@")[1] != "mskcc.org":
            event = ExternalEmailEvent(job_group_notifier_id,
                                       request_id).to_dict()
            send_notification.delay(event)
    except Exception:
        logger.error("Failed to check labHeadEmail")

    if len(
            FileRepository.filter(metadata={
                "requestId": request_id,
                "tumorOrNormal": "Tumor"
            })) == 0:
        only_normal_samples_event = OnlyNormalSamplesEvent(
            job_group_notifier_id, request_id).to_dict()
        send_notification.delay(only_normal_samples_event)
        if recipe in settings.ASSAYS_ADMIN_HOLD_ONLY_NORMALS:
            admin_hold_event = AdminHoldEvent(job_group_notifier_id).to_dict()
            send_notification.delay(admin_hold_event)
            return []

    operators = Operator.objects.filter(recipes__overlap=[recipe])

    if not operators:
        # TODO: Import ticket will have CIReviewNeeded
        msg = "No operator defined for requestId %s with recipe %s" % (
            request_id, recipe)
        logger.error(msg)
        e = OperatorRequestEvent(job_group_notifier_id,
                                 "[CIReviewEvent] %s" % msg).to_dict()
        send_notification.delay(e)
        ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict()
        send_notification.delay(ci_review_e)
        raise FailedToSubmitToOperatorException(msg)
    for operator in operators:
        if not operator.active:
            msg = "Operator not active: %s" % operator.class_name
            logger.info(msg)
            e = OperatorRequestEvent(job_group_notifier_id,
                                     "[CIReviewEvent] %s" % msg).to_dict()
            send_notification.delay(e)
            error_label = SetLabelEvent(job_group_notifier_id,
                                        "operator_inactive").to_dict()
            send_notification.delay(error_label)
            ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict()
            send_notification.delay(ci_review_e)
        else:
            logger.info("Submitting request_id %s to %s operator" %
                        (request_id, operator.class_name))
            if Job.objects.filter(job_group=job_group,
                                  args__request_id=request_id,
                                  run=TYPES["SAMPLE"],
                                  status=JobStatus.FAILED).all():
                partialy_complete_event = ETLImportPartiallyCompleteEvent(
                    job_notifier=job_group_notifier_id).to_dict()
                send_notification.delay(partialy_complete_event)
            else:
                complete_event = ETLImportCompleteEvent(
                    job_notifier=job_group_notifier_id).to_dict()
                send_notification.delay(complete_event)

            create_jobs_from_request.delay(request_id, operator.id, job_group)
    return []
Exemplo n.º 4
0
    def _generate_ticket_decription(self):
        samples_completed = set()
        samples_failed = set()
        all_jobs = []
        request_jobs = []
        sample_jobs = []
        pooled_normal_jobs = []

        jobs = Job.objects.filter(job_group=self.job.job_group.id).all()

        for job in jobs:
            if job.run == TYPES['SAMPLE']:
                if job.status == JobStatus.COMPLETED:
                    samples_completed.add(job.args['sample_id'])
                elif job.status == JobStatus.FAILED:
                    samples_failed.add(job.args['sample_id'])

            if job.run == TYPES['SAMPLE']:
                sample_jobs.append((str(job.id), JobStatus(job.status).name, self.get_key(job.run), job.message or "",
                                   job.args.get('sample_id', '')))
            elif job.run == TYPES['REQUEST']:
                request_jobs.append(
                    (str(job.id), '', self.get_key(job.run), job.message or "", ''))
            elif job.run == TYPES['POOLED_NORMAL']:
                pooled_normal_jobs.append(
                    (str(job.id), JobStatus(job.status).name, self.get_key(job.run), job.message or "",
                     job.args.get('sample_id', '')))

        all_jobs.extend(request_jobs)
        all_jobs.extend(sample_jobs)
        all_jobs.extend(pooled_normal_jobs)

        request_metadata = Job.objects.filter(args__request_id=self.job.args['request_id'],
                                              run=TYPES['SAMPLE']).order_by('-created_date').first()

        number_of_tumors = FileRepository.filter(
            metadata={'requestId': self.job.args['request_id'], 'tumorOrNormal': 'Tumor'}, values_metadata='sampleId').count()
        number_of_normals = FileRepository.filter(
            metadata={'requestId': self.job.args['request_id'], 'tumorOrNormal': 'Normal'}, values_metadata='sampleId').count()

        data_analyst_email = ""
        data_analyst_name = ""
        investigator_email = ""
        investigator_name = ""
        lab_head_email = ""
        lab_head_name = ""
        pi_email = ""
        project_manager_name = ""
        recipe = ""
        qc_access_emails = ""

        if request_metadata:
            metadata = request_metadata.args.get('request_metadata', {})
            recipe = metadata['recipe']
            data_analyst_email = metadata['dataAnalystEmail']
            data_analyst_name = metadata['dataAnalystName']
            investigator_email = metadata['investigatorEmail']
            investigator_name = metadata['investigatorName']
            lab_head_email = metadata['labHeadEmail']
            lab_head_name = metadata['labHeadName']
            pi_email = metadata['piEmail']
            project_manager_name = metadata['projectManagerName']
            qc_access_emails = metadata['qcAccessEmails']

        event = ETLImportEvent(str(self.job.job_group_notifier.id),
                               str(self.job.job_group.id),
                               self.job.args['request_id'],
                               list(samples_completed),
                               list(samples_failed),
                               recipe,
                               data_analyst_email,
                               data_analyst_name,
                               investigator_email,
                               investigator_name,
                               lab_head_email,
                               lab_head_name,
                               pi_email,
                               project_manager_name,
                               qc_access_emails,
                               number_of_tumors,
                               number_of_normals,
                               len(pooled_normal_jobs)
                               )
        e = event.to_dict()
        send_notification.delay(e)

        etl_event = ETLJobsLinksEvent(str(self.job.job_group_notifier.id),
                                      self.job.args['request_id'],
                                      all_jobs)
        etl_e = etl_event.to_dict()
        send_notification.delay(etl_e)
Exemplo n.º 5
0
    def _generate_ticket_decription(self):
        samples_completed = set()
        samples_failed = set()
        all_jobs = []
        request_jobs = []
        sample_jobs = []
        pooled_normal_jobs = []

        jobs = Job.objects.filter(job_group=self.job.job_group.id).all()

        for job in jobs:
            if job.run == TYPES["SAMPLE"]:
                if job.status == JobStatus.COMPLETED:
                    samples_completed.add(job.args["sample_id"])
                elif job.status == JobStatus.FAILED:
                    samples_failed.add(job.args["sample_id"])

            if job.run == TYPES["SAMPLE"]:
                sample_jobs.append((
                    str(job.id),
                    JobStatus(job.status).name,
                    self.get_key(job.run),
                    job.message or "",
                    job.args.get("sample_id", ""),
                ))
            elif job.run == TYPES["REQUEST"]:
                request_jobs.append(
                    (str(job.id), "", self.get_key(job.run), job.message
                     or "", ""))
            elif job.run == TYPES["POOLED_NORMAL"]:
                pooled_normal_jobs.append((
                    str(job.id),
                    JobStatus(job.status).name,
                    self.get_key(job.run),
                    job.message or "",
                    job.args.get("sample_id", ""),
                ))

        all_jobs.extend(request_jobs)
        all_jobs.extend(sample_jobs)
        all_jobs.extend(pooled_normal_jobs)

        request_metadata = (Job.objects.filter(
            args__request_id=self.job.args["request_id"],
            run=TYPES["SAMPLE"]).order_by("-created_date").first())

        number_of_tumors = FileRepository.filter(
            metadata={
                "requestId": self.job.args["request_id"],
                "tumorOrNormal": "Tumor"
            },
            values_metadata="sampleId").count()
        number_of_normals = FileRepository.filter(
            metadata={
                "requestId": self.job.args["request_id"],
                "tumorOrNormal": "Normal"
            },
            values_metadata="sampleId").count()

        data_analyst_email = ""
        data_analyst_name = ""
        investigator_email = ""
        investigator_name = ""
        lab_head_email = ""
        lab_head_name = ""
        pi_email = ""
        project_manager_name = ""
        recipe = ""
        qc_access_emails = ""
        data_access_emails = ""
        other_contact_emails = ""

        if request_metadata:
            metadata = request_metadata.args.get("request_metadata", {})
            recipe = metadata["recipe"]
            data_analyst_email = metadata["dataAnalystEmail"]
            data_analyst_name = metadata["dataAnalystName"]
            investigator_email = metadata["investigatorEmail"]
            investigator_name = metadata["investigatorName"]
            lab_head_email = metadata["labHeadEmail"]
            lab_head_name = metadata["labHeadName"]
            pi_email = metadata["piEmail"]
            project_manager_name = metadata["projectManagerName"]
            qc_access_emails = metadata["qcAccessEmails"]
            data_access_emails = metadata["dataAccessEmails"]
            other_contact_emails = metadata["otherContactEmails"]

        event = ETLImportEvent(
            str(self.job.job_group_notifier.id),
            str(self.job.job_group.id),
            self.job.args["request_id"],
            list(samples_completed),
            list(samples_failed),
            recipe,
            data_analyst_email,
            data_analyst_name,
            investigator_email,
            investigator_name,
            lab_head_email,
            lab_head_name,
            pi_email,
            project_manager_name,
            qc_access_emails,
            number_of_tumors,
            number_of_normals,
            len(pooled_normal_jobs),
            data_access_emails,
            other_contact_emails,
        )
        e = event.to_dict()
        send_notification.delay(e)

        etl_event = ETLJobsLinksEvent(str(self.job.job_group_notifier.id),
                                      self.job.args["request_id"], all_jobs)
        etl_e = etl_event.to_dict()
        send_notification.delay(etl_e)