示例#1
0
def fail_job(self,
             run_id,
             error_message,
             lsf_log_location=None,
             input_json_location=None):
    lock_id = "run_lock_%s" % run_id
    with memcache_task_lock(lock_id, self.app.oid) as acquired:
        if acquired:
            run = RunObjectFactory.from_db(run_id)
            if run.run_obj.is_failed:
                logger.info(
                    format_log("Run Fail already processed", obj=run.run_obj))
                return

            restart_run = run.run_obj.set_for_restart()

            if not restart_run:
                run.fail(error_message)
                run.to_db()

                job_group_notifier = run.job_group_notifier
                job_group_notifier_id = str(
                    job_group_notifier.id) if job_group_notifier else None

                ci_review = SetCIReviewEvent(job_group_notifier_id).to_dict()
                send_notification.delay(ci_review)

                _upload_qc_report(run.run_obj)
                _job_finished_notify(run, lsf_log_location,
                                     input_json_location)
            else:
                run_id, output_directory, execution_id = restart_run
                submit_job.delay(run_id, output_directory, execution_id)
        else:
            logger.warning("Run %s is processing by another worker" % run_id)
示例#2
0
def create_run_task(run_id, inputs, output_directory=None):
    logger.info(format_log("Creating and validating run", obj_id=run_id))
    run = RunObjectFactory.from_definition(run_id, inputs)
    run.ready()
    run.to_db()
    submit_job.delay(run_id, output_directory)
    logger.info(format_log("Run is ready", obj=run))
示例#3
0
def submit_job(run_id, output_directory=None, execution_id=None):
    resume = None
    try:
        run = Run.objects.get(id=run_id)
    except Run.DoesNotExist:
        raise Exception("Failed to submit a run")

    run1 = RunObjectFactory.from_db(run_id)
    if run.resume:
        run2 = RunObjectFactory.from_db(run.resume)

        if run1.equal(run2):
            logger.info(
                format_log("Resuming run with execution id %s" %
                           run2.run_obj.execution_id,
                           obj=run))
            resume = str(run2.run_obj.execution_id)
        else:
            logger.info(
                format_log(
                    "Failed to resume runs as run is not equal to the following run: %s"
                    % str(run2),
                    obj=run))
    if execution_id:
        resume = execution_id
    if not output_directory:
        output_directory = os.path.join(run.app.output_directory, str(run_id))
    job = run1.dump_job(output_directory=output_directory)
    logger.info(format_log("Job ready for submitting", obj=run))
    if resume:
        url = urljoin(settings.RIDGEBACK_URL,
                      "/v0/jobs/{id}/resume/".format(id=resume))
        job = {"root_dir": output_directory}
    else:
        url = settings.RIDGEBACK_URL + "/v0/jobs/"
    if run.app.walltime:
        job["walltime"] = run.app.walltime
    if run.app.memlimit:
        job["memlimit"] = run.app.memlimit
    response = requests.post(url, json=job)
    if response.status_code == 201:
        run.execution_id = response.json()["id"]
        logger.info(format_log("Job successfully submitted", obj=run))
        run.save()
    else:
        raise Exception("Failed to submit job %s" % run_id)
示例#4
0
    def test_run_fail_job(self, mock_get_pipeline, memcache_task_lock, send_notification, set_for_restart):
        with open("runner/tests/run/pair-workflow.cwl", "r") as f:
            app = json.load(f)
        with open("runner/tests/run/inputs.json", "r") as f:
            inputs = json.load(f)

        set_for_restart.return_value = None
        mock_get_pipeline.return_value = app
        memcache_task_lock.return_value = True
        send_notification.return_value = False
        run = RunObjectFactory.from_definition(str(self.run.id), inputs)
        run.to_db()

        operator_run = OperatorRun.objects.first()
        operator_run.runs.add(run.run_obj)
        num_failed_runs = operator_run.num_failed_runs
        fail_job(run.run_id, {"details": "Error has happened"})
        operator_run.refresh_from_db()
        self.assertEqual(operator_run.num_failed_runs, num_failed_runs + 1)

        run_obj = RunObjectFactory.from_db(run.run_id)
        self.assertEqual(run_obj.message, {"details": "Error has happened"})
示例#5
0
 def test_run_to_db(self, mock_get_pipeline):
     with open("runner/tests/run/pair-workflow.cwl", "r") as f:
         app = json.load(f)
     with open("runner/tests/run/inputs.json", "r") as f:
         inputs = json.load(f)
     mock_get_pipeline.return_value = app
     run = RunObjectFactory.from_definition(str(self.run.id), inputs)
     run.to_db()
     try:
         run_obj = Run.objects.get(id=run.run_id)
     except Run.DoesNotExist as e:
         pass
     self.assertEqual(str(run_obj.id), run.run_id)
示例#6
0
 def test_run_complete_job(
     self, mock_populate_job_group_notifier, mock_get_pipeline, memcache_task_lock, send_notification
 ):
     with open("runner/tests/run/pair-workflow.cwl", "r") as f:
         app = json.load(f)
     with open("runner/tests/run/inputs.json", "r") as f:
         inputs = json.load(f)
     mock_populate_job_group_notifier.return_value = None
     mock_get_pipeline.return_value = app
     memcache_task_lock.return_value = True
     send_notification.return_value = False
     run = RunObjectFactory.from_definition(str(self.run.id), inputs)
     run.to_db()
     operator_run = OperatorRun.objects.first()
     operator_run.runs.add(run.run_obj)
     num_completed_runs = operator_run.num_completed_runs
     complete_job(run.run_id, self.outputs)
     operator_run.refresh_from_db()
     self.assertEqual(operator_run.num_completed_runs, num_completed_runs + 1)
     run_obj = RunObjectFactory.from_db(run.run_id)
     file_obj = File.objects.filter(path=self.outputs["maf"]["location"].replace("file://", "")).first()
     run_obj.to_db()
     for out in run_obj.outputs:
         if out.name == "maf":
             self.assertEqual(out.value["location"], self.outputs["maf"]["location"])
             self.assertEqual(FileProcessor.get_bid_from_file(file_obj), out.db_value["location"])
     port = Port.objects.filter(run_id=run_obj.run_id, name="bams").first()
     self.assertEqual(len(port.files.all()), 4)
     expected_result = (
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam",
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai",
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam",
         "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai",
     )
     self.assertTrue(port.files.all()[0].path in expected_result)
     self.assertTrue(port.files.all()[1].path in expected_result)
     self.assertTrue(port.files.all()[2].path in expected_result)
     self.assertTrue(port.files.all()[3].path in expected_result)
示例#7
0
    def test_restart_run(self, submit_job_task):
        fg = FileGroup.objects.create(name="test", slug="test")
        pipeline = Pipeline.objects.create(name="pipeline",
                                           output_directory="/tmp",
                                           output_file_group=fg)
        operator_run = OperatorRun.objects.create(num_total_runs=1,
                                                  num_completed_runs=0,
                                                  num_failed_runs=1)
        failed_run = Run.objects.create(
            name="failed_run",
            operator_run=operator_run,
            output_directory="/test",
            status=RunStatus.FAILED,
            notify_for_outputs=[],
            app=pipeline,
        )
        input_port = Port.objects.create(run=failed_run,
                                         port_type=PortType.INPUT)
        output_port = Port.objects.create(run=failed_run,
                                          port_type=PortType.OUTPUT)
        operator_run_id = operator_run.id

        response = self.client.post("/v0/run/restart/",
                                    {"operator_run_id": operator_run_id},
                                    format="json")
        self.assertEqual(response.status_code, status.HTTP_201_CREATED)

        # Restarted run should have a new ID
        restart_run_id = submit_job_task.call_args[0][0]
        self.assertNotEqual(failed_run.id, restart_run_id)
        # Restarted run should have resume directory set to original run
        restarted_run = Run.objects.get(id=restart_run_id)
        self.assertEqual(str(failed_run.id), str(restarted_run.resume))
        # Both runs should have same input ports
        restart_run_object = RunObjectFactory.from_db(restart_run_id)
        original_run_object = RunObjectFactory.from_db(failed_run.id)
        self.assertTrue(original_run_object.equal(restart_run_object))
示例#8
0
 def test_run_creation_from_cwl(self, mock_get_pipeline):
     with open("runner/tests/run/pair-workflow.cwl", "r") as f:
         app = json.load(f)
     with open("runner/tests/run/inputs.json", "r") as f:
         inputs = json.load(f)
     mock_get_pipeline.return_value = app
     run = RunObjectFactory.from_definition(str(self.run.id), inputs)
     run.ready()
     for inp in run.inputs:
         if inp.name == "pair":
             self.assertEqual(inp.db_value[0]["R1"][0]["location"], "bid://%s" % str(self.file1.id))
             self.assertEqual(inp.value[0]["R1"][0]["path"], self.file1.path)
             self.assertEqual(inp.db_value[0]["R2"][0]["location"], "bid://%s" % str(self.file2.id))
             self.assertEqual(inp.value[0]["R2"][0]["path"], self.file2.path)
             self.assertEqual(inp.db_value[1]["R1"][0]["location"], "bid://%s" % str(self.file3.id))
             self.assertEqual(inp.value[1]["R1"][0]["path"], self.file3.path)
             self.assertEqual(inp.db_value[1]["R2"][0]["location"], "bid://%s" % str(self.file4.id))
             self.assertEqual(inp.value[1]["R2"][0]["path"], self.file4.path)
示例#9
0
def complete_job(self,
                 run_id,
                 outputs,
                 lsf_log_location=None,
                 inputs_json_location=None):
    lock_id = "run_lock_%s" % run_id
    with memcache_task_lock(lock_id, self.app.oid) as acquired:
        if acquired:
            run = RunObjectFactory.from_db(run_id)
            if run.run_obj.is_completed:
                logger.info(
                    format_log("Run Complete already processed",
                               obj=run.run_obj))
                return

            logger.info(format_log("Completing Run", obj=run.run_obj))

            try:
                run.complete(outputs)
            except Exception as e:
                fail_job(run_id, str(e))
                return

            run.to_db()
            job_group = run.job_group
            job_group_id = str(job_group.id) if job_group else None

            _job_finished_notify(run, lsf_log_location, inputs_json_location)

            for trigger in run.run_obj.operator_run.operator.from_triggers.filter(
                    run_type=TriggerRunType.INDIVIDUAL):
                create_jobs_from_chaining.delay(
                    trigger.to_operator_id,
                    trigger.from_operator_id,
                    [run_id],
                    job_group_id=job_group_id,
                    parent=str(run.run_obj.operator_run.id)
                    if run.run_obj.operator_run else None,
                )
        else:
            logger.warning("Run %s is processing by another worker" % run_id)