def create_experiment_directory(experiment_uuid, pipeline_uuid, project_uuid): def ignore_patterns(path, fnames): """ Example: path, fnames = \ 'docker/catching-error/testing', ['hello.txt', 'some-dir'] """ # Ignore the ".orchest/pipelines" directory containing the # logs and data directories. if path.endswith(".orchest"): return ["pipelines"] # Ignore nothing. return [] snapshot_path = os.path.join( get_experiment_directory(pipeline_uuid, project_uuid, experiment_uuid), "snapshot", ) os.makedirs(os.path.split(snapshot_path)[0], exist_ok=True) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_uuid_to_path(project_uuid)) shutil.copytree(project_dir, snapshot_path, ignore=ignore_patterns)
def _collateral(self, project_uuid: str): """Remove a project from the fs and the orchest-api""" # Delete the project directory. try: project_path = project_uuid_to_path(project_uuid) full_project_path = os.path.join( current_app.config["PROJECTS_DIR"], project_path) shutil.rmtree(full_project_path) except FileNotFoundError: # If the `full_project_path` is not found, it means that the # user has already performed the deletion operation. So we # need to catch and then ignore this error, otherwise the DB # deletion operation will not continue. pass # Remove jobs directories related to project. remove_project_jobs_directories(project_uuid) # Issue project deletion to the orchest-api. url = ( f"http://{current_app.config['ORCHEST_API_ADDRESS']}/api/projects/" f"{project_uuid}") current_app.config["SCHEDULER"].add_job(requests.delete, args=[url]) # Will delete cascade pipeline, pipeline run. Project.query.filter_by(uuid=project_uuid).delete() db.session.commit()
def process_log_manager(data): if data["action"] == "pty-broadcast": socketio.emit( "pty-output", { "output": data["output"], "session_uuid": data["session_uuid"] }, namespace="/pty", ) elif data["action"] == "pty-reset": socketio.emit("pty-reset", {"session_uuid": data["session_uuid"]}, namespace="/pty") else: # relay incoming message to # pty-log-manager-receiver (log_streamer client) # for relay server side augmentation can happen # for non-client data models (such as project path) if data["action"] == "fetch-logs": data["project_path"] = project_uuid_to_path( data["project_uuid"]) socketio.emit("pty-log-manager-receiver", data, namespace="/pty")
def get_project_directory(project_uuid, host_path=False): USER_DIR = app.config["USER_DIR"] if host_path == True: USER_DIR = app.config["HOST_USER_DIR"] return os.path.join(USER_DIR, "projects", project_uuid_to_path(project_uuid))
def catch_api_proxy_environment_image_builds(): environment_image_build_requests = request.json[ "environment_image_build_requests"] for environment_image_build_request in environment_image_build_requests: environment_image_build_request[ "project_path"] = project_uuid_to_path( environment_image_build_request["project_uuid"]) resp = api_proxy_environment_image_builds( environment_image_build_requests, app.config["ORCHEST_API_ADDRESS"]) for environment_image_build_request in environment_image_build_requests: environment_uuid = environment_image_build_request[ "environment_uuid"] project_uuid = environment_image_build_request["project_uuid"] env = get_environment(environment_uuid, project_uuid) analytics.send_event( app, analytics.Event.ENVIRONMENT_BUILD_START, { "environment_uuid": environment_uuid, "project_uuid": project_uuid, "language": env.language, "gpu_support": env.gpu_support, "base_image": env.base_image, }, ) return resp.content, resp.status_code, resp.headers.items()
def build_environments(environment_uuids, project_uuid): project_path = project_uuid_to_path(project_uuid) environment_build_requests = [{ "project_uuid": project_uuid, "project_path": project_path, "environment_uuid": environment_uuid, } for environment_uuid in environment_uuids] return api_proxy_environment_builds( environment_build_requests, current_app.config["ORCHEST_API_ADDRESS"])
def catch_api_proxy_environment_builds(): environment_build_requests = request.json["environment_build_requests"] for environment_build_request in environment_build_requests: environment_build_request["project_path"] = project_uuid_to_path( environment_build_request["project_uuid"]) resp = api_proxy_environment_builds(environment_build_requests, app.config["ORCHEST_API_ADDRESS"]) return resp.content, resp.status_code, resp.headers.items()
def _transaction(self, project_uuid): """Synchronizes the state of the pipelines of a project. Synchronizes the state of the filesystem with the db when it comes to the pipelines of a project. Pipelines removed from the filesystem are removed, new pipelines (or pipelines that where there after, for example a project import) are registered in the db. Args: project_uuid: Raises: FileNotFoundError: If the project directory is not found. """ project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join( current_app.config["USER_DIR"], "projects", project_path ) if not os.path.isdir(project_dir): raise FileNotFoundError("Project directory not found") # Find all pipelines in the project directory. pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # Cleanup pipelines that have been manually removed. fs_removed_pipelines = [ pipeline for pipeline in Pipeline.query.filter(Pipeline.path.notin_(pipeline_paths)) .filter(Pipeline.project_uuid == project_uuid) .all() ] for pip in fs_removed_pipelines: DeletePipeline(self.tpe).transaction(pip.project_uuid, pip.uuid) # Identify all pipeline paths that are not yet a pipeline, that # is, pipelines that were added through the filesystem. existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter(Pipeline.path.in_(pipeline_paths)) .filter(Pipeline.project_uuid == project_uuid) .all() ] # TODO: handle existing pipeline assignments. new_pipelines_from_fs = set(pipeline_paths) - set(existing_pipeline_paths) for path in new_pipelines_from_fs: AddPipelineFromFS(self.tpe).transaction(project_uuid, path)
def create_experiment_directory(experiment_uuid, pipeline_uuid, project_uuid): experiment_path = os.path.join( app.config["USER_DIR"], "experiments", project_uuid, pipeline_uuid, experiment_uuid, ) os.makedirs(experiment_path) snapshot_path = os.path.join(experiment_path, "snapshot") project_dir = os.path.join(app.config["USER_DIR"], "projects", project_uuid_to_path(project_uuid)) os.system("cp -R %s %s" % (project_dir, snapshot_path))
def get_pipeline_path( pipeline_uuid, project_uuid, experiment_uuid=None, pipeline_run_uuid=None, host_path=False, pipeline_path=None, ): USER_DIR = app.config["USER_DIR"] if host_path == True: USER_DIR = app.config["HOST_USER_DIR"] if pipeline_path is None: pipeline_path = pipeline_uuid_to_path(pipeline_uuid, project_uuid) project_path = project_uuid_to_path(project_uuid) if pipeline_run_uuid is None: return os.path.join(USER_DIR, "projects", project_path, pipeline_path) elif pipeline_run_uuid is not None and experiment_uuid is not None: return os.path.join( USER_DIR, "experiments", project_uuid, pipeline_uuid, experiment_uuid, pipeline_run_uuid, pipeline_path, ) elif experiment_uuid is not None: return os.path.join( USER_DIR, "experiments", project_uuid, pipeline_uuid, experiment_uuid, "snapshot", pipeline_path, )
def _collateral(self, project_uuid: str): """Remove a project from the fs and the orchest-api""" # Delete the project directory. project_path = project_uuid_to_path(project_uuid) # The project has been deleted by a concurrent deletion request. if project_path is None: return full_project_path = safe_join(current_app.config["PROJECTS_DIR"], project_path) rmtree(full_project_path) # Remove jobs directories related to project. remove_project_jobs_directories(project_uuid) # Issue project deletion to the orchest-api. url = ( f"http://{current_app.config['ORCHEST_API_ADDRESS']}/api/projects/" f"{project_uuid}") current_app.config["SCHEDULER"].add_job(requests.delete, args=[url]) # Will delete cascade pipeline, pipeline run. Project.query.filter_by(uuid=project_uuid).delete() db.session.commit()
def projects(): project_dir = os.path.join(app.config["USER_DIR"], "projects") project_paths = [ name for name in os.listdir(project_dir) if os.path.isdir(os.path.join(project_dir, name)) ] # create UUID entry for all projects that do not yet exist existing_project_paths = [ project.path for project in Project.query.filter( Project.path.in_(project_paths)).all() ] new_project_paths = set(project_paths) - set(existing_project_paths) for new_project_path in new_project_paths: new_project = Project( uuid=str(uuid.uuid4()), path=new_project_path, ) db.session.add(new_project) db.session.commit() # build environments on project detection build_environments_for_project(new_project.uuid) # end of UUID creation if request.method == "GET": projects = projects_schema.dump(Project.query.all()) # Get counts for: pipelines, experiments and environments for project in projects: project["pipeline_count"] = Pipeline.query.filter( Pipeline.project_uuid == project["uuid"]).count() project["experiment_count"] = Experiment.query.filter( Experiment.project_uuid == project["uuid"]).count() project["environment_count"] = len( get_environments(project["uuid"])) return jsonify(projects) elif request.method == "DELETE": project_uuid = request.json["project_uuid"] project = Project.query.filter( Project.uuid == project_uuid).first() if project != None: project_path = project_uuid_to_path(project_uuid) full_project_path = os.path.join(project_dir, project_path) shutil.rmtree(full_project_path) db.session.delete(project) db.session.commit() # refresh kernels after change in environments populate_kernels(app, db) return jsonify({"message": "Project deleted."}) else: return ( jsonify({ "message": "Project not found for UUID %s." % project_uuid }), 404, ) elif request.method == "POST": project_path = request.json["name"] if project_path not in project_paths: full_project_path = os.path.join(project_dir, project_path) if not os.path.isdir(full_project_path): new_project = Project( uuid=str(uuid.uuid4()), path=project_path, ) db.session.add(new_project) db.session.commit() os.makedirs(full_project_path) # initialize with default environments populate_default_environments(new_project.uuid) # refresh kernels after change in environments populate_kernels(app, db) # build environments on project creation build_environments_for_project(new_project.uuid) else: return ( jsonify( {"message": "Project directory already exists."}), 409, ) else: return ( jsonify({"message": "Project name already exists."}), 409, ) return jsonify({"message": "Project created."})
def projects(): project_dir = os.path.join(app.config["USER_DIR"], "projects") project_paths = [ name for name in os.listdir(project_dir) if os.path.isdir(os.path.join(project_dir, name)) ] # create UUID entry for all projects that do not yet exist existing_project_paths = [ project.path for project in Project.query.filter( Project.path.in_(project_paths)).all() ] new_project_paths = set(project_paths) - set(existing_project_paths) for new_project_path in new_project_paths: new_project = Project( uuid=str(uuid.uuid4()), path=new_project_path, ) db.session.add(new_project) db.session.commit() # end of UUID creation if request.method == "GET": return jsonify(projects_schema.dump(Project.query.all())) elif request.method == "DELETE": project_uuid = request.json["project_uuid"] project = Project.query.filter( Project.uuid == project_uuid).first() if project != None: project_path = project_uuid_to_path(project_uuid) full_project_path = os.path.join(project_dir, project_path) os.system("rm -r %s" % (full_project_path)) db.session.delete(project) db.session.commit() return jsonify({"message": "Project deleted."}) else: return ( jsonify({ "message": "Project not found for UUID %s." % project_uuid }), 404, ) elif request.method == "POST": project_path = request.json["name"] if project_path not in project_paths: full_project_path = os.path.join(project_dir, project_path) if not os.path.isdir(full_project_path): new_project = Project( uuid=str(uuid.uuid4()), path=project_path, ) db.session.add(new_project) db.session.commit() os.makedirs(full_project_path) else: return jsonify({"message": "Project directory exists."}), 409 else: return ( jsonify({ "message": "Project with the same name already exists." }), 409, ) return jsonify({"message": "Project created."})
def pipelines_get(project_uuid): project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_path) if not os.path.isdir(project_dir): return jsonify({"message": "Project directory not found."}), 404 # find all pipelines in project dir pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # identify all pipeline paths that are not yet a pipeline existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths) for new_pipeline_path in new_pipeline_paths: # write pipeline uuid to file pipeline_json_path = get_pipeline_path( None, project_uuid, pipeline_path=new_pipeline_path) try: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") new_pipeline_uuid = file_pipeline_uuid # see if pipeline_uuid is taken if (Pipeline.query.filter( Pipeline.uuid == file_pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0 or len(file_pipeline_uuid) == 0): new_pipeline_uuid = str(uuid.uuid4()) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = new_pipeline_uuid json_file.write(json.dumps(pipeline_json, indent=2)) # only commit if writing succeeds new_pipeline = Pipeline( uuid=new_pipeline_uuid, path=new_pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline) db.session.commit() except Exception as e: logging.info(e) pipelines = Pipeline.query.filter( Pipeline.project_uuid == project_uuid).all() pipelines_augmented = [] for pipeline in pipelines: pipeline_json_path = get_pipeline_path(pipeline.uuid, pipeline.project_uuid) pipeline_augmented = { "uuid": pipeline.uuid, "path": pipeline.path, } if os.path.isfile(pipeline_json_path): with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) pipeline_augmented["name"] = pipeline_json["name"] else: pipeline_augmented[ "name"] = "Warning: pipeline file was not found." pipelines_augmented.append(pipeline_augmented) json_string = json.dumps({ "success": True, "result": pipelines_augmented }) return json_string, 200, {"content-type": "application/json"}
def projects(): projects_dir = os.path.join(app.config["USER_DIR"], "projects") project_paths = [ name for name in os.listdir(projects_dir) if os.path.isdir(os.path.join(projects_dir, name)) ] # look for projects that have been removed through the filesystem by the user, cleanup # dangling resources fs_removed_projects = Project.query.filter( Project.path.notin_(project_paths)).all() for fs_removed_project in fs_removed_projects: cleanup_project_from_orchest(fs_removed_project) if len(fs_removed_projects) > 0: # refresh kernels after change in environments populate_kernels(app, db) # detect new projects by detecting directories that were not registered in the db as projects existing_project_paths = [ project.path for project in Project.query.filter( Project.path.in_(project_paths)).all() ] new_project_paths = set(project_paths) - set(existing_project_paths) for new_project_path in new_project_paths: try: init_project(new_project_path) except Exception as e: logging.error( f"Error during project initialization of {new_project_path}: {e}" ) if request.method == "GET": projects = projects_schema.dump(Project.query.all()) # Get counts for: pipelines, experiments and environments for project in projects: # catch both pipelines of newly initialized projects # and manually initialized pipelines of existing # projects sync_project_pipelines_db_state(project["uuid"]) project["pipeline_count"] = Pipeline.query.filter( Pipeline.project_uuid == project["uuid"]).count() project["experiment_count"] = Experiment.query.filter( Experiment.project_uuid == project["uuid"]).count() project["environment_count"] = len( get_environments(project["uuid"])) return jsonify(projects) elif request.method == "DELETE": project_uuid = request.json["project_uuid"] project = Project.query.filter( Project.uuid == project_uuid).first() if project != None: project_path = project_uuid_to_path(project_uuid) full_project_path = os.path.join(projects_dir, project_path) shutil.rmtree(full_project_path) cleanup_project_from_orchest(project) # refresh kernels after change in environments populate_kernels(app, db) return jsonify({"message": "Project deleted."}) else: return ( jsonify({ "message": "Project not found for UUID %s." % project_uuid }), 404, ) elif request.method == "POST": project_path = request.json["name"] if project_path not in project_paths: full_project_path = os.path.join(projects_dir, project_path) if not os.path.isdir(full_project_path): os.makedirs(full_project_path) # note that given the current pattern we have in the # GUI, where we POST and then GET projects, # this line does not strictly need to be there, # since the new directory will be picked up # on the GET request and initialized, placing it # here is more explicit and less relying # on the POST->GET pattern from the GUI try: init_project(project_path) except Exception as e: return ( jsonify({ "message": "Failed to create the project. Error: %s" % e }), 500, ) else: return ( jsonify( {"message": "Project directory already exists."}), 409, ) else: return ( jsonify({"message": "Project name already exists."}), 409, ) return jsonify({"message": "Project created."})
def sync_project_pipelines_db_state(project_uuid): """Synchronizes the state of the pipelines of a project (fs/db). Synchronizes the state of the filesystem with the db when it comes to the pipelines of a project. Pipelines removed from the file system are removed, new pipelines (or pipelines that were there after, for example, a project import) are registered in the db. Args: project_uuid: Raises: FileNotFoundError: If the project directory is not found. """ project_path = project_uuid_to_path(project_uuid) project_dir = os.path.join(app.config["USER_DIR"], "projects", project_path) if not os.path.isdir(project_dir): raise FileNotFoundError("Project directory not found") # find all pipelines in project dir pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # cleanup pipelines that have been manually removed fs_removed_pipelines = [ pipeline for pipeline in Pipeline.query.filter( Pipeline.path.notin_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] for fs_removed_pipeline in fs_removed_pipelines: cleanup_pipeline_from_orchest(fs_removed_pipeline) # identify all pipeline paths that are not yet a pipeline existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments new_pipeline_paths = set(pipeline_paths) - set(existing_pipeline_paths) for new_pipeline_path in new_pipeline_paths: # write pipeline uuid to file pipeline_json_path = get_pipeline_path( None, project_uuid, pipeline_path=new_pipeline_path) try: with open(pipeline_json_path, "r") as json_file: pipeline_json = json.load(json_file) file_pipeline_uuid = pipeline_json.get("uuid") new_pipeline_uuid = file_pipeline_uuid # see if pipeline_uuid is taken if (Pipeline.query.filter( Pipeline.uuid == file_pipeline_uuid).filter( Pipeline.project_uuid == project_uuid).count() > 0 or len(file_pipeline_uuid) == 0): new_pipeline_uuid = str(uuid.uuid4()) with open(pipeline_json_path, "w") as json_file: pipeline_json["uuid"] = new_pipeline_uuid json_file.write(json.dumps(pipeline_json, indent=4)) # only commit if writing succeeds new_pipeline = Pipeline( uuid=new_pipeline_uuid, path=new_pipeline_path, project_uuid=project_uuid, ) db.session.add(new_pipeline) db.session.commit() except Exception as e: logging.info(e)
def _transaction(self, project_uuid): """Synchronizes the state of the pipelines of a project. Synchronizes the state of the filesystem with the db when it comes to the pipelines of a project. Pipelines removed from the filesystem are removed, new pipelines (or pipelines that where there after, for example a project import) are registered in the db. Args: project_uuid: Raises: FileNotFoundError: If the project directory is not found. """ project_path = project_uuid_to_path(project_uuid) project_dir = safe_join(current_app.config["USER_DIR"], "projects", project_path) # Lock the project to avoid race conditions in pipeline deletion # or creation. Project.query.with_for_update().filter_by(uuid=project_uuid).one() if not os.path.isdir(project_dir): raise FileNotFoundError("Project directory not found") # Find all pipelines in the project directory. pipeline_paths = find_pipelines_in_dir(project_dir, project_dir) # Cleanup pipelines that have been manually removed. fs_removed_pipelines = [ pipeline for pipeline in Pipeline.query.filter( Pipeline.path.notin_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid, Pipeline.status == "READY", ).all() ] for pip in fs_removed_pipelines: DeletePipeline(self.tpe).transaction(pip.project_uuid, pip.uuid, remove_file=False) # Identify all pipeline paths that are not yet a pipeline, that # is, pipelines that were added through the filesystem. existing_pipeline_paths = [ pipeline.path for pipeline in Pipeline.query.filter( Pipeline.path.in_(pipeline_paths)).filter( Pipeline.project_uuid == project_uuid).all() ] # TODO: handle existing pipeline assignments. new_pipelines_from_fs = set(pipeline_paths) - set( existing_pipeline_paths) for path in new_pipelines_from_fs: pipeline_json_path = get_pipeline_path(None, project_uuid, pipeline_path=path) with open(pipeline_json_path, "r") as json_file: pipeline_uuid = json.load(json_file)["uuid"] # This is not a new pipeline, the pipeline is being moved. is_moving = (Pipeline.query.filter_by(project_uuid=project_uuid, uuid=pipeline_uuid, status="MOVING").count() > 0) if not is_moving: AddPipelineFromFS(self.tpe).transaction(project_uuid, path)