示例#1
0
def training():
    form = ExperimentForm()
    if form.validate_on_submit():
        experiment = models.Experiment()
        generated_tuples = experiment.generate(
            num_trials=int(form.num_trials.data),
            data_type=int(form.data_type.data),
            matrix_size=int(form.matrix_size.data))
        experiment.user_id = g.user.id
        experiment.training = True

        db.session.add(experiment)
        for (trial, matrix) in generated_tuples:
            db.session.add(trial)
            db.session.add(matrix)

        db.session.commit()

        return redirect(
            url_for('instructions', experiment_id=str(experiment.id)))

    return render_template('training.html',
                           title='Training',
                           form=form,
                           user=g.user)
示例#2
0
 def test_default_generate(self):
     e = models.Experiment()
     # print e.generate()
     self.assertEqual(len(e.generate()), 60)
示例#3
0
 def test_generate_3(self):
     e = models.Experiment()
     self.assertEqual(len(e.generate(num_trials = 5, data_type = 'alpha',
                                     matrix_size = 4)), 20)
    def post(self):
        """Queues a new experiment."""
        # TODO: possibly use marshal() on the post_data
        # https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.marshal
        #       to make sure the default values etc. are filled in.
        post_data = request.get_json()

        # TODO: maybe we can expect a datetime (in the schema) so we
        #       do not have to parse it here.
        #       https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.fields.DateTime
        scheduled_start = post_data["scheduled_start"]
        scheduled_start = datetime.fromisoformat(scheduled_start)

        pipeline_runs = []
        pipeline_run_spec = post_data["pipeline_run_spec"]
        for pipeline_description, id_ in zip(
                post_data["pipeline_descriptions"],
                post_data["pipeline_run_ids"]):
            pipeline_run_spec["pipeline_description"] = pipeline_description
            pipeline = construct_pipeline(**post_data["pipeline_run_spec"])

            # TODO: This can be made more efficient, since the pipeline
            #       is the same for all pipeline runs. The only
            #       difference is the parameters. So all the jobs could
            #       be created in batch.
            # Create Celery object with the Flask context and construct the
            # kwargs for the job.
            celery = make_celery(current_app)
            celery_job_kwargs = {
                "experiment_uuid": post_data["experiment_uuid"],
                "project_uuid": post_data["project_uuid"],
                "pipeline_description": pipeline.to_dict(),
                "run_config": pipeline_run_spec["run_config"],
            }

            # Start the run as a background task on Celery. Due to circular
            # imports we send the task by name instead of importing the
            # function directly.
            res = celery.send_task(
                "app.core.tasks.start_non_interactive_pipeline_run",
                eta=scheduled_start,
                kwargs=celery_job_kwargs,
            )

            # NOTE: this is only if a backend is configured.  The task does
            # not return anything. Therefore we can forget its result and
            # make sure that the Celery backend releases recourses (for
            # storing and transmitting results) associated to the task.
            # Uncomment the line below if applicable.
            res.forget()

            non_interactive_run = {
                "experiment_uuid": post_data["experiment_uuid"],
                "run_uuid": res.id,
                "pipeline_run_id": id_,
                "pipeline_uuid": pipeline.properties["uuid"],
                "project_uuid": post_data["project_uuid"],
                "status": "PENDING",
            }
            db.session.add(models.NonInteractiveRun(**non_interactive_run))

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipline steps that
            # will be run.
            step_uuids = [s.properties["uuid"] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.NonInteractiveRunPipelineStep(
                        **{
                            "experiment_uuid": post_data["experiment_uuid"],
                            "run_uuid": res.id,
                            "step_uuid": step_uuid,
                            "status": "PENDING",
                        }))
            db.session.bulk_save_objects(pipeline_steps)
            db.session.commit()

            non_interactive_run["pipeline_steps"] = pipeline_steps
            pipeline_runs.append(non_interactive_run)

        experiment = {
            "experiment_uuid": post_data["experiment_uuid"],
            "project_uuid": post_data["project_uuid"],
            "pipeline_uuid": post_data["pipeline_uuid"],
            "scheduled_start": scheduled_start,
            "total_number_of_pipeline_runs": len(pipeline_runs),
        }
        db.session.add(models.Experiment(**experiment))
        db.session.commit()

        experiment["pipeline_runs"] = pipeline_runs
        return experiment, 201
示例#5
0
    def post(self):
        """Queues a new experiment."""
        # TODO: possibly use marshal() on the post_data
        # https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.marshal
        #       to make sure the default values etc. are filled in.
        post_data = request.get_json()

        # TODO: maybe we can expect a datetime (in the schema) so we
        #       do not have to parse it here.
        #       https://flask-restplus.readthedocs.io/en/stable/api.html#flask_restplus.fields.DateTime
        scheduled_start = post_data['scheduled_start']
        scheduled_start = datetime.fromisoformat(scheduled_start)

        pipeline_runs = []
        pipeline_run_spec = post_data['pipeline_run_spec']
        for pipeline_description, id_ in zip(
                post_data['pipeline_descriptions'],
                post_data['pipeline_run_ids']):
            pipeline_run_spec['pipeline_description'] = pipeline_description
            pipeline = construct_pipeline(**post_data['pipeline_run_spec'])

            # TODO: This can be made more efficient, since the pipeline
            #       is the same for all pipeline runs. The only
            #       difference is the parameters. So all the jobs could
            #       be created in batch.
            # Create Celery object with the Flask context and construct the
            # kwargs for the job.
            celery = make_celery(current_app)
            celery_job_kwargs = {
                'experiment_uuid': post_data['experiment_uuid'],
                'pipeline_description': pipeline.to_dict(),
                'run_config': pipeline_run_spec['run_config'],
            }

            # Start the run as a background task on Celery. Due to circular
            # imports we send the task by name instead of importing the
            # function directly.
            res = celery.send_task(
                'app.core.tasks.start_non_interactive_pipeline_run',
                eta=scheduled_start,
                kwargs=celery_job_kwargs)

            non_interactive_run = {
                'experiment_uuid': post_data['experiment_uuid'],
                'run_uuid': res.id,
                'pipeline_run_id': id_,
                'pipeline_uuid': pipeline.properties['uuid'],
                'status': 'PENDING',
            }
            db.session.add(models.NonInteractiveRun(**non_interactive_run))

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipline steps that
            # will be run.
            step_uuids = [s.properties['uuid'] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.NonInteractiveRunPipelineStep(
                        **{
                            'experiment_uuid': post_data['experiment_uuid'],
                            'run_uuid': res.id,
                            'step_uuid': step_uuid,
                            'status': 'PENDING'
                        }))
            db.session.bulk_save_objects(pipeline_steps)
            db.session.commit()

            non_interactive_run['pipeline_steps'] = pipeline_steps
            pipeline_runs.append(non_interactive_run)

        experiment = {
            'experiment_uuid': post_data['experiment_uuid'],
            'pipeline_uuid': post_data['pipeline_uuid'],
            'scheduled_start': scheduled_start,
        }
        db.session.add(models.Experiment(**experiment))
        db.session.commit()

        experiment['pipeline_runs'] = pipeline_runs
        return experiment, 201
示例#6
0
    def post(self):
        """Queues a new experiment."""
        # TODO: possibly use marshal() on the post_data. Note that we
        # have moved over to using flask_restx
        # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.marshal
        #       to make sure the default values etc. are filled in.
        post_data = request.get_json()

        # TODO: maybe we can expect a datetime (in the schema) so we
        #       do not have to parse it here. Again note that we are now
        #       using flask_restx
        # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.fields.DateTime
        scheduled_start = post_data["scheduled_start"]
        scheduled_start = datetime.fromisoformat(scheduled_start)

        experiment = {
            "experiment_uuid":
            post_data["experiment_uuid"],
            "project_uuid":
            post_data["project_uuid"],
            "pipeline_uuid":
            post_data["pipeline_uuid"],
            "scheduled_start":
            scheduled_start,
            "total_number_of_pipeline_runs":
            len(post_data["pipeline_definitions"]),
        }
        db.session.add(models.Experiment(**experiment))
        db.session.commit()

        pipeline_runs = []
        pipeline_run_spec = post_data["pipeline_run_spec"]
        env_uuid_docker_id_mappings = None
        # this way we write the entire exp to db, but avoid
        # launching any run (celery task) if we detected a problem
        experiment_creation_error_messages = []
        tasks_to_launch = []

        # TODO: This can be made more efficient, since the pipeline
        #       is the same for all pipeline runs. The only
        #       difference is the parameters. So all the jobs could
        #       be created in batch.
        for pipeline_definition, id_ in zip(post_data["pipeline_definitions"],
                                            post_data["pipeline_run_ids"]):
            pipeline_run_spec["pipeline_definition"] = pipeline_definition
            pipeline = construct_pipeline(**post_data["pipeline_run_spec"])

            # specify the task_id beforehand to avoid race conditions
            # between the task and its presence in the db
            task_id = str(uuid.uuid4())

            non_interactive_run = {
                "experiment_uuid": post_data["experiment_uuid"],
                "run_uuid": task_id,
                "pipeline_run_id": id_,
                "pipeline_uuid": pipeline.properties["uuid"],
                "project_uuid": post_data["project_uuid"],
                "status": "PENDING",
            }
            db.session.add(
                models.NonInteractivePipelineRun(**non_interactive_run))
            # need to flush because otherwise the bulk insertion of
            # pipeline steps will lead to foreign key errors
            # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats
            db.session.flush()

            # TODO: this code is also in `namespace_runs`. Could
            #       potentially be put in a function for modularity.
            # Set an initial value for the status of the pipeline
            # steps that will be run.
            step_uuids = [s.properties["uuid"] for s in pipeline.steps]
            pipeline_steps = []
            for step_uuid in step_uuids:
                pipeline_steps.append(
                    models.PipelineRunStep(
                        **{
                            "run_uuid": task_id,
                            "step_uuid": step_uuid,
                            "status": "PENDING",
                        }))
            db.session.bulk_save_objects(pipeline_steps)
            db.session.commit()

            non_interactive_run["pipeline_steps"] = pipeline_steps
            pipeline_runs.append(non_interactive_run)

            # get docker ids of images to use and make it so that the
            # images will not be deleted in case they become
            # outdated by an environment rebuild
            # compute it only once because this way we are guaranteed
            # that the mappings will be the same for all runs, having
            # a new environment build terminate while submitting the
            # different runs won't affect the experiment
            if env_uuid_docker_id_mappings is None:
                try:
                    env_uuid_docker_id_mappings = lock_environment_images_for_run(
                        task_id,
                        post_data["project_uuid"],
                        pipeline.get_environments(),
                    )
                except errors.ImageNotFound as e:
                    experiment_creation_error_messages.append(
                        f"Pipeline was referencing environments for "
                        f"which an image does not exist, {e}")
            else:
                image_mappings = [
                    models.PipelineRunImageMapping(
                        **{
                            "run_uuid": task_id,
                            "orchest_environment_uuid": env_uuid,
                            "docker_img_id": docker_id,
                        }) for env_uuid, docker_id in
                    env_uuid_docker_id_mappings.items()
                ]
                db.session.bulk_save_objects(image_mappings)
                db.session.commit()

            if len(experiment_creation_error_messages) == 0:
                # prepare the args for the task
                run_config = pipeline_run_spec["run_config"]
                run_config[
                    "env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings
                celery_job_kwargs = {
                    "experiment_uuid": post_data["experiment_uuid"],
                    "project_uuid": post_data["project_uuid"],
                    "pipeline_definition": pipeline.to_dict(),
                    "run_config": run_config,
                }

                # Due to circular imports we use the task name instead
                # of importing the function directly.
                tasks_to_launch.append({
                    "name":
                    "app.core.tasks.start_non_interactive_pipeline_run",
                    "eta": scheduled_start,
                    "kwargs": celery_job_kwargs,
                    "task_id": task_id,
                })

        experiment["pipeline_runs"] = pipeline_runs

        if len(experiment_creation_error_messages) == 0:
            # Create Celery object with the Flask context
            celery = make_celery(current_app)
            for task in tasks_to_launch:
                res = celery.send_task(**task)
                # NOTE: this is only if a backend is configured.
                # The task does not return anything. Therefore we can
                # forget its result and make sure that the Celery
                # backend releases recourses (for storing and
                # transmitting results) associated to the task.
                # Uncomment the line below if applicable.
                res.forget()

            return experiment, 201
        else:
            current_app.logger.error(
                "\n".join(experiment_creation_error_messages))

            # simple way to update both in memory objects
            # and the db while avoiding multiple update statements
            # (1 for each object)
            for pipeline_run in experiment["pipeline_runs"]:
                pipeline_run.status = "SUCCESS"
                for step in pipeline_run["pipeline_steps"]:
                    step.status = "FAILURE"

                models.PipelineRunStep.query.filter_by(
                    run_uuid=pipeline_run["run_uuid"]).update(
                        {"status": "FAILURE"})

            models.NonInteractivePipelineRun.query.filter_by(
                experiment_uuid=post_data["experiment_uuid"]).update(
                    {"status": "SUCCESS"})
            db.session.commit()

            return {
                "message":
                ("Failed to create experiment because not all referenced"
                 "environments are available.")
            }, 500