def create_task(**kwargs): """Creates a new task in our database/object storage. Args: **kwargs: arbitrary keyword arguments. Returns: The task info. """ name = kwargs.get('name', None) description = kwargs.get('description', None) tags = kwargs.get('tags', None) image = kwargs.get('image', None) commands = kwargs.get('commands', None) arguments = kwargs.get('arguments', None) experiment_notebook = kwargs.get('experiment_notebook', None) deployment_notebook = kwargs.get('deployment_notebook', None) is_default = kwargs.get('is_default', None) copy_from = kwargs.get('copy_from', None) if not isinstance(name, str): raise BadRequest("name is required") if copy_from and (experiment_notebook or deployment_notebook): raise BadRequest("Either provide notebooks or a task to copy from") if tags is None or len(tags) == 0: tags = ["DEFAULT"] if any(tag not in VALID_TAGS for tag in tags): valid_str = ",".join(VALID_TAGS) raise BadRequest(f"Invalid tag. Choose any of {valid_str}") # check if image is a valid docker image if image: pattern = re.compile('[a-z0-9.-]+([/]{1}[a-z0-9.-]+)+([:]{1}[a-z0-9.-]+){0,1}$') if pattern.match(image) is None: raise BadRequest("invalid docker image name") check_comp_name = db_session.query(Task).filter_by(name=name).first() if check_comp_name: raise BadRequest("a task with that name already exists") # creates a task with specified name, # but copies notebooks from a source task if copy_from: return copy_task(name, description, tags, copy_from) task_id = str(uuid_alpha()) # loads a sample notebook if none was sent if experiment_notebook is None and "DATASETS" not in tags: experiment_notebook = EXPERIMENT_NOTEBOOK if deployment_notebook is None and "DATASETS" not in tags: deployment_notebook = DEPLOYMENT_NOTEBOOK # The new task must have its own task_id, experiment_id and operator_id. # Notice these values are ignored when a notebook is run in a pipeline. # They are only used by JupyterLab interface. init_notebook_metadata(task_id, deployment_notebook, experiment_notebook) # saves new notebooks to object storage if "DATASETS" not in tags: obj_name = f"{PREFIX}/{task_id}/Experiment.ipynb" experiment_notebook_path = f"minio://{BUCKET_NAME}/{obj_name}" put_object(obj_name, dumps(experiment_notebook).encode()) obj_name = f"{PREFIX}/{task_id}/Deployment.ipynb" deployment_notebook_path = f"minio://{BUCKET_NAME}/{obj_name}" put_object(obj_name, dumps(deployment_notebook).encode()) # create deployment notebook and experiment_notebook on jupyter create_jupyter_files(task_name=name, deployment_notebook=dumps(deployment_notebook).encode(), experiment_notebook=dumps(experiment_notebook).encode()) else: experiment_notebook_path = None deployment_notebook_path = None if commands is None or len(commands) == 0: commands = DEFAULT_COMMANDS if arguments is None or len(arguments) == 0: arguments = DEFAULT_ARGUMENTS # saves task info to the database task = Task(uuid=task_id, name=name, description=description, tags=tags, image=image, commands=commands, arguments=arguments, experiment_notebook_path=experiment_notebook_path, deployment_notebook_path=deployment_notebook_path, is_default=is_default) db_session.add(task) db_session.commit() return task.as_dict()
def copy_task(name, description, tags, copy_from): """Makes a copy of a task in our database/object storage. Args: name (str): the task name. description (str): the task description. tags (list): the task tags list. copy_from (str): the task_id from which the notebooks are copied. Returns: The task info. """ task = Task.query.get(copy_from) if task is None: raise BadRequest("Source task does not exist") task_id = uuid_alpha() image = task.image commands = task.commands arguments = task.arguments # reads source notebooks from object storage source_name = f"{PREFIX}/{copy_from}/Deployment.ipynb" deployment_notebook = loads(get_object(source_name)) source_name = f"{PREFIX}/{copy_from}/Experiment.ipynb" experiment_notebook = loads(get_object(source_name)) # Even though we are creating 'copies', the new task must have # its own task_id, experiment_id and operator_id. # We don't want to mix models and metrics of different tasks. # Notice these values are ignored when a notebook is run in a pipeline. # They are only used by JupyterLab interface. init_notebook_metadata(task_id, deployment_notebook, experiment_notebook) # saves new notebooks to object storage destination_name = f"{PREFIX}/{task_id}/Deployment.ipynb" deployment_notebook_path = f"minio://{BUCKET_NAME}/{destination_name}" put_object(destination_name, dumps(deployment_notebook).encode()) destination_name = f"{PREFIX}/{task_id}/Experiment.ipynb" experiment_notebook_path = f"minio://{BUCKET_NAME}/{destination_name}" put_object(destination_name, dumps(experiment_notebook).encode()) # create deployment notebook and eperiment notebook on jupyter create_jupyter_files(task_name=name, deployment_notebook=dumps(deployment_notebook).encode(), experiment_notebook=dumps(experiment_notebook).encode()) # saves task info to the database task = Task(uuid=task_id, name=name, description=description, tags=tags, image=image, commands=commands, arguments=arguments, deployment_notebook_path=deployment_notebook_path, experiment_notebook_path=experiment_notebook_path, is_default=False) db_session.add(task) db_session.commit() return task.as_dict()