def delete(snapshot_id=None): """Delete a snapshot within a project The project must be created before this is implemented. You can do that by using the following command:: $ datmo init Parameters ---------- snapshot_id : str snapshot id to be updated Returns ------- snapshot entity returns a Snapshot entity Examples -------- You can use this function within a project repository to delete a snapshot. >>> import datmo >>> datmo.snapshot.delete(snapshot_id="4L24adFfsa") """ snapshot_controller = SnapshotController() snapshot_controller.delete(snapshot_id=snapshot_id)
def create_from_task(message, task_id, home=None): """Create a snapshot within a project from a completed task Parameters ---------- message : str a description of the snapshot for later reference task_id : str task object id to use to create snapshot Returns ------- Snapshot returns a Snapshot entity as defined above Examples -------- You can use this function within a project repository to save snapshots for later use. Once you have created this, you will be able to view the snapshot with the `datmo snapshot ls` cli command >>> import datmo >>> datmo.snapshot.create_from_task(message="my first snapshot from task", task_id="1jfkshg049") """ if not home: home = os.getcwd() snapshot_controller = SnapshotController(home=home) # Create a new core snapshot object core_snapshot_obj = snapshot_controller.create_from_task(message, task_id) # Create a new snapshot object client_snapshot_obj = Snapshot(core_snapshot_obj, home=home) return client_snapshot_obj
def __init__(self, home): super(TaskController, self).__init__(home) self.environment = EnvironmentController(home) self.snapshot = SnapshotController(home) if not self.is_initialized: raise ProjectNotInitializedException( __("error", "controller.task.__init__"))
def inspect(self, **kwargs): self.snapshot_controller = SnapshotController() snapshot_id = kwargs.get("id", None) snapshot_obj = self.snapshot_controller.get(snapshot_id) output = str(snapshot_obj) self.cli_helper.echo(output) return output
def setup_method(self): # provide mountable tmp directory for docker tempfile.tempdir = "/tmp" if not platform.system( ) == "Windows" else None test_datmo_dir = os.environ.get('TEST_DATMO_DIR', tempfile.gettempdir()) self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) self.project = ProjectController(self.temp_dir) self.project.init("test", "test description") self.snapshot = SnapshotController(self.temp_dir) # Create environment_driver definition self.env_def_path = os.path.join(self.temp_dir, "Dockerfile") with open(self.env_def_path, "w") as f: f.write(str("FROM datmo/xgboost:cpu")) # Create config self.config_filepath = os.path.join(self.snapshot.home, "config.json") with open(self.config_filepath, "w") as f: f.write(str('{"foo":1}')) # Create stats self.stats_filepath = os.path.join(self.snapshot.home, "stats.json") with open(self.stats_filepath, "w") as f: f.write(str('{"bar":1}')) # Create test file self.filepath = os.path.join(self.snapshot.home, "file.txt") with open(self.filepath, "w") as f: f.write(str("test"))
def diff(self, **kwargs): self.snapshot_controller = SnapshotController() snapshot_id_1 = kwargs.get("id_1", None) snapshot_id_2 = kwargs.get("id_2", None) snapshot_obj_1 = self.snapshot_controller.get(snapshot_id_1) snapshot_obj_2 = self.snapshot_controller.get(snapshot_id_2) comparison_attributes = [ "id", "created_at", "message", "label", "code_id", "environment_id", "file_collection_id" ] table_data = [["Attributes", "Snapshot 1", "", "Snapshot 2"], ["", "", "", ""]] for attribute in comparison_attributes: value_1 = getattr(snapshot_obj_1, attribute) if getattr( snapshot_obj_1, attribute) else "N/A" value_2 = getattr(snapshot_obj_2, attribute) if getattr( snapshot_obj_2, attribute) else "N/A" if isinstance(value_1, datetime): value_1 = prettify_datetime(value_1) if isinstance(value_2, datetime): value_2 = prettify_datetime(value_2) table_data.append([attribute, value_1, "->", value_2]) output = format_table(table_data) self.cli_helper.echo(output) return output
def delete(self, **kwargs): self.snapshot_controller = SnapshotController() self.cli_helper.echo(__("info", "cli.snapshot.delete")) snapshot_id = kwargs.get('id') result = self.snapshot_controller.delete(snapshot_id) self.cli_helper.echo( __("info", "cli.snapshot.delete.success", snapshot_id)) return result
def checkout(self, **kwargs): self.snapshot_controller = SnapshotController() snapshot_id = kwargs.get('id') checkout_success = self.snapshot_controller.checkout(snapshot_id) if checkout_success: self.cli_helper.echo( __("info", "cli.snapshot.checkout.success", snapshot_id)) return self.snapshot_controller.checkout(snapshot_id)
def __init__(self): super(TaskController, self).__init__() self.environment = EnvironmentController() self.snapshot = SnapshotController() self.spinner = Spinner() if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.task.__init__"))
def ls(filter=None): """List snapshots within a project The project must be created before this is implemented. You can do that by using the following command:: $ datmo init Parameters ---------- filter : str, optional a string to use to filter from message and label (default is to give all snapshots, unless provided a specific string. eg: best) Returns ------- list returns a list of Snapshot entities (as defined above) Examples -------- You can use this function within a project repository to list snapshots. >>> import datmo >>> snapshots = datmo.snapshot.ls() """ snapshot_controller = SnapshotController() # add arguments if they are not None core_snapshot_objs = snapshot_controller.list(visible=True, sort_key='created_at', sort_order='descending') # Filtering Snapshots # TODO: move to list function in SnapshotController # Add in preliminary snapshots if no filter filtered_core_snapshot_objs = [ core_snapshot_obj for core_snapshot_obj in core_snapshot_objs if core_snapshot_obj.visible and not filter ] # If filter is present then use it and only add those that pass filter for core_snapshot_obj in core_snapshot_objs: if core_snapshot_obj.visible: if filter and \ ((filter in core_snapshot_obj.message) \ or (core_snapshot_obj.label != None and filter in core_snapshot_obj.label)): filtered_core_snapshot_objs.append(core_snapshot_obj) # Return Snapshot entities return [ Snapshot(filtered_core_snapshot_obj) for filtered_core_snapshot_obj in filtered_core_snapshot_objs ]
def __get_core_snapshot(self): """Returns the latest core snapshot object for id Returns ------- datmo.core.entity.snapshot.Snapshot core snapshot object for the snapshot """ snapshot_controller = SnapshotController() return snapshot_controller.get(self.id)
def setup_method(self): # provide mountable tmp directory for docker tempfile.tempdir = "/tmp" if not platform.system( ) == "Windows" else None test_datmo_dir = os.environ.get('TEST_DATMO_DIR', tempfile.gettempdir()) self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) self.project = ProjectController(self.temp_dir) self.project.init("test", "test description") self.task = TaskController(self.temp_dir) self.snapshot = SnapshotController(self.temp_dir)
def __get_core_snapshot(self): """Returns the latest core snapshot object for id Returns ------- datmo.core.entity.snapshot.Snapshot core snapshot object for the Snapshot """ snapshot_controller = SnapshotController() snapshot_id = self.after_snapshot_id if self.after_snapshot_id else self.before_snapshot_id snapshot_obj = snapshot_controller.get(snapshot_id) return snapshot_obj
def update(snapshot_id=None, config=None, stats=None, message=None, label=None): """Update a snapshot within a project The project must be created before this is implemented. You can do that by using the following command:: $ datmo init Parameters ---------- snapshot_id : str snapshot id to be updated config : dict, optional provide the dictionary of configurations to update (default is None, which means it is not being updated) stats : dict, optional provide the dictionary of relevant statistics or metrics to update (default is None, which means it is not being updated) message : str, optional a string to use as a new message for the snapshot (default is the already given message to that snapshot, unless provided a specific string.) label : str, optional a string to use as a new label for the snapshot (default is the already given label to that snapshot, unless provided a specific string.) Returns ------- snapshot entity returns a Snapshot entity Examples -------- You can use this function within a project repository to update a snapshot. >>> import datmo >>> snapshots = datmo.snapshot.update(snapshot_id="4L24adFfsa", config={"depth": "10", "learning_rate": "0.91"}, ... stats={"acc": "91.34", "f1_score": "0.91"}, message="new message", label="best") """ snapshot_controller = SnapshotController() return snapshot_controller.update(snapshot_id=snapshot_id, config=config, stats=stats, message=message, label=label)
def rerun(self, **kwargs): self.task_controller = TaskController() # Get task id task_id = kwargs.get("id", None) self.cli_helper.echo(__("info", "cli.run.rerun", task_id)) # Create the task_obj task_obj = self.task_controller.get(task_id) # Create the run obj run_obj = Run(task_obj) # Select the initial snapshot if it's a script else the final snapshot initial = True if run_obj.type == 'script' else False environment_id = run_obj.environment_id command = task_obj.command_list snapshot_id = run_obj.core_snapshot_id if not initial else run_obj.before_snapshot_id # Checkout to the core snapshot id before rerunning the task self.snapshot_controller = SnapshotController() try: checkout_success = self.snapshot_controller.checkout(snapshot_id) except Exception: self.cli_helper.echo(__("error", "cli.snapshot.checkout.failure")) sys.exit(1) if checkout_success: self.cli_helper.echo( __("info", "cli.snapshot.checkout.success", snapshot_id)) # Rerunning the task # Create input dictionary for the new task snapshot_dict = {} snapshot_dict["environment_id"] = environment_id task_dict = { "ports": task_obj.ports, "interactive": task_obj.interactive, "mem_limit": task_obj.mem_limit, "command_list": command, "data_file_path_map": task_obj.data_file_path_map, "data_directory_path_map": task_obj.data_directory_path_map, "workspace": task_obj.workspace } # Run task and return Task object result new_task_obj = self.task_run_helper(task_dict, snapshot_dict, "cli.run.run") if not new_task_obj: return False # Creating the run object new_run_obj = Run(new_task_obj) return new_run_obj
def get_files(self, mode="r"): """Returns a list of file objects for the snapshot Parameters ---------- mode : str file object mode (default is "r" which signifies read mode) Returns ------- list list of file objects associated with the snapshot """ snapshot_controller = SnapshotController() return snapshot_controller.get_files(self.id, mode=mode)
def test_init_fail_project_not_init(self): failed = False try: SnapshotController(self.temp_dir) except ProjectNotInitializedException: failed = True assert failed
def test_init_fail_invalid_path(self): test_home = "some_random_dir" failed = False try: SnapshotController(test_home) except InvalidProjectPathException: failed = True assert failed
def test_init_fail_project_not_init(self): Config().set_home(self.temp_dir) failed = False try: SnapshotController() except ProjectNotInitialized: failed = True assert failed
def get_files(self, mode="r"): """Returns a list of file objects for the task Parameters ---------- mode : str file object mode (default is "r" which signifies read mode) Returns ------- list or None list of file objects associated with the task """ snapshot_controller = SnapshotController() self._core_snapshot = self.__get_core_snapshot() return snapshot_controller.get_files( self._core_snapshot.id, mode=mode) if self._core_snapshot else None
def test_init_fail_invalid_path(self): test_home = "some_random_dir" Config().set_home(test_home) failed = False try: SnapshotController() except InvalidProjectPath: failed = True assert failed
def ls(self, **kwargs): # Create controllers self.task_controller = TaskController() self.snapshot_controller = SnapshotController() session_id = kwargs.get('session_id', self.task_controller.current_session.id) print_format = kwargs.get('format', "table") download = kwargs.get('download', None) download_path = kwargs.get('download_path', None) # Get all task meta information task_objs = self.task_controller.list(session_id, sort_key="created_at", sort_order="descending") header_list = [ "id", "command", "status", "config", "results", "created at" ] item_dict_list = [] run_obj_list = [] for task_obj in task_objs: # Create a new Run Object from Task Object run_obj = RunObject(task_obj) task_results_printable = printable_object(str(run_obj.results)) snapshot_config_printable = printable_object(str(run_obj.config)) item_dict_list.append({ "id": run_obj.id, "command": run_obj.command, "status": run_obj.status, "config": snapshot_config_printable, "results": task_results_printable, "created at": prettify_datetime(run_obj.created_at) }) run_obj_list.append(run_obj) if download: if not download_path: # download to current working directory with timestamp current_time = datetime.utcnow() epoch_time = datetime.utcfromtimestamp(0) current_time_unix_time_ms = ( current_time - epoch_time).total_seconds() * 1000.0 download_path = os.path.join( os.getcwd(), "run_ls_" + str(current_time_unix_time_ms)) self.cli_helper.print_items(header_list, item_dict_list, print_format=print_format, output_path=download_path) return task_objs self.cli_helper.print_items(header_list, item_dict_list, print_format=print_format) return run_obj_list
def update(self, **kwargs): self.snapshot_controller = SnapshotController() self.cli_helper.echo(__("info", "cli.snapshot.update")) snapshot_id = kwargs.get('id') # getting previous saved config and stats snapshot_obj = self.snapshot_controller.get(snapshot_id) config = snapshot_obj.config stats = snapshot_obj.stats # extracting config update_config_list = kwargs.get('config', None) if update_config_list: update_config = {} for item in update_config_list: item_parsed_dict = parse_cli_key_value(item, 'config') update_config.update(item_parsed_dict) # updating config config.update(update_config) # extracting stats update_stats_list = kwargs.get('stats', None) if update_stats_list: update_stats = {} for item in update_stats_list: item_parsed_dict = parse_cli_key_value(item, 'stats') update_stats.update(item_parsed_dict) # updating stats stats.update(update_stats) # extracting message message = kwargs.get('message', None) # extracting label label = kwargs.get('label', None) result = self.snapshot_controller.update( snapshot_id, config=config, stats=stats, message=message, label=label) self.cli_helper.echo( __("info", "cli.snapshot.update.success", snapshot_id)) return result
def diff(self, **kwargs): self.snapshot_controller = SnapshotController() snapshot_id_1 = kwargs.get("id_1", None) snapshot_id_2 = kwargs.get("id_2", None) snapshot_obj_1 = self.snapshot_controller.get(snapshot_id_1) snapshot_obj_2 = self.snapshot_controller.get(snapshot_id_2) comparison_attributes = [ "id", "created_at", "message", "label", "code_id", "environment_id", "file_collection_id", "config", "stats" ] table_data = [["Attributes", "Snapshot 1", "", "Snapshot 2"], ["", "", "", ""]] for attribute in comparison_attributes: value_1 = getattr(snapshot_obj_1, attribute) if getattr( snapshot_obj_1, attribute) else "N/A" value_2 = getattr(snapshot_obj_2, attribute) if getattr( snapshot_obj_2, attribute) else "N/A" if isinstance(value_1, datetime): value_1 = prettify_datetime(value_1) if isinstance(value_2, datetime): value_2 = prettify_datetime(value_2) if attribute in ["config", "stats"]: alldict = [] if isinstance(value_1, dict): alldict.append(value_1) if isinstance(value_2, dict): alldict.append(value_2) allkey = set().union(*alldict) for key in allkey: key_value_1 = "%s: %s" % (key, value_1[key]) if value_1 != "N/A" and value_1.get(key, None) \ else "N/A" key_value_2 = "%s: %s" % (key, value_2[key]) if value_2 != "N/A" and value_2.get(key, None) \ else "N/A" table_data.append( [attribute, key_value_1, "->", key_value_2]) else: table_data.append([attribute, value_1, "->", value_2]) output = format_table(table_data) self.cli_helper.echo(output) return output
def run(self, **kwargs): self.cli_helper.echo(__("info", "cli.task.run")) # Create controllers self.task_controller = TaskController() self.snapshot_controller = SnapshotController() # Create input dictionaries snapshot_dict = {} # Environment if kwargs.get("environment_id", None) or kwargs.get( "environment_paths", None): mutually_exclusive_args = ["environment_id", "environment_paths"] mutually_exclusive(mutually_exclusive_args, kwargs, snapshot_dict) task_dict = { "ports": kwargs['ports'], "interactive": kwargs['interactive'], "mem_limit": kwargs['mem_limit'] } if not isinstance(kwargs['cmd'], list): if platform.system() == "Windows": task_dict['command'] = kwargs['cmd'] elif isinstance(kwargs['cmd'], basestring): task_dict['command_list'] = shlex.split(kwargs['cmd']) else: task_dict['command_list'] = kwargs['cmd'] # Create the task object task_obj = self.task_controller.create() try: # Pass in the task to run updated_task_obj = self.task_controller.run( task_obj.id, snapshot_dict=snapshot_dict, task_dict=task_dict) except Exception as e: self.logger.error("%s %s" % (e, task_dict)) self.cli_helper.echo("%s" % e) self.cli_helper.echo(__("error", "cli.task.run", task_obj.id)) return False self.cli_helper.echo( __("info", "cli.task.run.complete", updated_task_obj.id)) return updated_task_obj
def status(self): """Return the project status information if initialized Returns ------- status_dict : dict dictionary with project metadata and config current_snapshot : datmo.core.entity.snapshot.Snapshot snapshot object of the current state of the repo if present else None latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated by the user if present else None latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated automatically by datmo if present else None unstaged_code : bool True if code has unstaged changes unstaged_environment : bool True if environment has unstaged changes unstaged_files : bool True if files have unstaged changes """ if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.project.status")) # TODO: Add in note when environment is not setup or intialized # Add in project metadata status_dict = self.model.to_dictionary().copy() # Find all project settings status_dict["config"] = self.config_store.to_dict() # Find the latest snapshot generated by the user descending_snapshots = self.dal.snapshot.query( { "visible": True }, sort_key="created_at", sort_order="descending") latest_snapshot_user_generated = descending_snapshots[ 0] if descending_snapshots else None # Show the latest snapshot generated automatically by datmo descending_snapshots = self.dal.snapshot.query( { "visible": False }, sort_key="created_at", sort_order="descending") latest_snapshot_auto_generated = descending_snapshots[ 0] if descending_snapshots else None # TODO: add in latest run self.code_controller = CodeController() try: unstaged_code = self.code_controller.check_unstaged_changes() except UnstagedChanges: unstaged_code = True self.environment_controller = EnvironmentController() try: unstaged_environment = self.environment_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_environment = True self.file_collection_controller = FileCollectionController() try: unstaged_files = self.file_collection_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_files = True # If exists, obtain the current snapshot, if unstaged changes, will be None self.snapshot_controller = SnapshotController() try: current_snapshot = self.snapshot_controller.current_snapshot() except UnstagedChanges: current_snapshot = None return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \ unstaged_code, unstaged_environment, unstaged_files
class ProjectController(BaseController): """ProjectController inherits from BaseController and manages business logic related to the project. One model is associated with each project currently. Methods ------- init(name, description) Initialize the project repository as a new model or update the existing project cleanup() Remove all datmo references from the current repository. NOTE: THIS WILL DELETE ALL DATMO WORK status() Give the user a picture of the status of the project, snapshots, and tasks """ def __init__(self): super(ProjectController, self).__init__() def init(self, name, description): """ Initialize the project This function will initialize the project or reinitialize it the project is already initialized. Parameters ---------- name : str description : str Returns ------- bool """ is_new_model = False old_model = self.model if not self.model: is_new_model = True try: # Always validate inputs to the init function validate("create_project", { "name": name, "description": description }) # Initialize File Driver if needed if not self.file_driver.is_initialized: self.file_driver.init() # Initialize the dal if not self.dal.is_initialized: self.dal.init() # Initialize Code Driver if needed if not self.code_driver.is_initialized: self.code_driver.init() # Initialize Environment Driver if needed if not self.environment_driver.is_initialized: self.environment_driver.init() # Initialize the config JSON store self.config_store = JSONStore( os.path.join(self.home, Config().datmo_directory_name, ".config")) # Create model if new else update if is_new_model: _ = self.dal.model.create( Model({ "name": name, "description": description })) else: self._model = self.dal.model.update({ "id": self.model.id, "name": name, "description": description }) # Connect Environment Driver if needed # (not required but will warn if not present) try: if not self.environment_driver.is_connected: self.environment_driver.connect() except EnvironmentConnectFailed: self.logger.warning( __("warn", "controller.general.environment.failed")) # Build the initial default Environment (NOT NECESSARY) # self.environment_driver.build_image(tag="datmo-" + \ # self.model.name) return True except Exception: # if any error occurred with new model, ensure no initialize occurs and raise previous error # if any error occurred with existing model, ensure no updates were made, raise previous error if is_new_model: self.cleanup() else: self._model = self.dal.model.update({ "id": old_model.id, "name": old_model.name, "description": old_model.description }) raise def cleanup(self): """Cleans the project structure completely Notes ----- This function will not error out but will gracefully exit, since it is used in cases where init fails as a check against mid-initialized projects Returns ------- bool """ if not self.is_initialized: self.logger.warning( __("warn", "controller.project.cleanup.not_init")) # Remove Datmo environment_driver references, give warning if error try: # Obtain image id before cleaning up if exists images = self.environment_driver.list_images(name="datmo-" + \ self.model.name) image_id = images[0].id if images else None except Exception: self.logger.warning( __("warn", "controller.project.cleanup.environment")) # Remove Datmo code_driver references, give warning if error try: if self.code_driver.is_initialized: for ref in self.code_driver.list_refs(): self.code_driver.delete_ref(ref) except Exception: self.logger.warning(__("warn", "controller.project.cleanup.code")) try: # Remove Hidden Datmo file structure, give warning if error self.file_driver.delete_hidden_datmo_file_structure() except (FileIOError, PathDoesNotExist): self.logger.warning(__("warn", "controller.project.cleanup.files")) try: if image_id: # Remove image created during init self.environment_driver.remove_image( image_id_or_name=image_id, force=True) # Remove any dangling images (optional) # Stop and remove all running environments with image_id self.environment_driver.stop_remove_containers_by_term( image_id, force=True) except Exception: self.logger.warning( __("warn", "controller.project.cleanup.environment")) return True def status(self): """Return the project status information if initialized Returns ------- status_dict : dict dictionary with project metadata and config current_snapshot : datmo.core.entity.snapshot.Snapshot snapshot object of the current state of the repo if present else None latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated by the user if present else None latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated automatically by datmo if present else None unstaged_code : bool True if code has unstaged changes unstaged_environment : bool True if environment has unstaged changes unstaged_files : bool True if files have unstaged changes """ if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.project.status")) # TODO: Add in note when environment is not setup or intialized # Add in project metadata status_dict = self.model.to_dictionary().copy() # Find all project settings status_dict["config"] = self.config_store.to_dict() # Find the latest snapshot generated by the user descending_snapshots = self.dal.snapshot.query( { "visible": True }, sort_key="created_at", sort_order="descending") latest_snapshot_user_generated = descending_snapshots[ 0] if descending_snapshots else None # Show the latest snapshot generated automatically by datmo descending_snapshots = self.dal.snapshot.query( { "visible": False }, sort_key="created_at", sort_order="descending") latest_snapshot_auto_generated = descending_snapshots[ 0] if descending_snapshots else None # TODO: add in latest run self.code_controller = CodeController() try: unstaged_code = self.code_controller.check_unstaged_changes() except UnstagedChanges: unstaged_code = True self.environment_controller = EnvironmentController() try: unstaged_environment = self.environment_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_environment = True self.file_collection_controller = FileCollectionController() try: unstaged_files = self.file_collection_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_files = True # If exists, obtain the current snapshot, if unstaged changes, will be None self.snapshot_controller = SnapshotController() try: current_snapshot = self.snapshot_controller.current_snapshot() except UnstagedChanges: current_snapshot = None return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \ unstaged_code, unstaged_environment, unstaged_files
class TestProjectController(): def setup_method(self): self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) Config().set_home(self.temp_dir) self.project_controller = ProjectController() self.environment_ids = [] def teardown_method(self): if not check_docker_inactive(test_datmo_dir): self.project_controller = ProjectController() if self.project_controller.is_initialized: self.environment_controller = EnvironmentController() for env_id in list(set(self.environment_ids)): if not self.environment_controller.delete(env_id): raise Exception def test_init_failure_none(self): # Test failed case failed = False try: self.project_controller.init(None, None) except ValidationFailed: failed = True assert failed def test_init_failure_empty_str(self): # Test failed case failed = False try: self.project_controller.init("", "") except ValidationFailed: failed = True assert failed assert not self.project_controller.code_driver.is_initialized assert not self.project_controller.file_driver.is_initialized def test_init_failure_git_code_driver(self): # Create a HEAD.lock file in .git to make GitCodeDriver.init() fail if self.project_controller.code_driver.type == "git": git_dir = os.path.join( self.project_controller.code_driver.filepath, ".git") os.makedirs(git_dir) with open(os.path.join(git_dir, "HEAD.lock"), "a+") as f: f.write(to_bytes("test")) failed = False try: self.project_controller.init("test1", "test description") except Exception: failed = True assert failed assert not self.project_controller.code_driver.is_initialized assert not self.project_controller.file_driver.is_initialized def test_init_success(self): result = self.project_controller.init("test1", "test description") # Tested with is_initialized assert self.project_controller.model.name == "test1" assert self.project_controller.model.description == "test description" assert result and self.project_controller.is_initialized # Changeable by user, not tested in is_initialized assert self.project_controller.current_session.name == "default" # TODO: Test lower level functions (DAL, JSONStore, etc for interruptions) # def test_init_with_interruption(self): # # Reinitializing after timed interruption during init # @timeout_decorator.timeout(0.001, use_signals=False) # def timed_init_with_interruption(): # result = self.project_controller.init("test1", "test description") # return result # # failed = False # try: # timed_init_with_interruption() # except timeout_decorator.timeout_decorator.TimeoutError: # failed = True # # Tested with is_initialized # assert failed # # # Reperforming init after a wait of 2 seconds # time.sleep(2) # result = self.project_controller.init("test2", "test description") # # Tested with is_initialized # assert self.project_controller.model.name == "test2" # assert self.project_controller.model.description == "test description" # assert result and self.project_controller.is_initialized # # # Changeable by user, not tested in is_initialized # assert self.project_controller.current_session.name == "default" def test_init_reinit_failure_empty_str(self): _ = self.project_controller.init("test1", "test description") failed = True try: self.project_controller.init("", "") except Exception: failed = True assert failed assert self.project_controller.model.name == "test1" assert self.project_controller.model.description == "test description" assert self.project_controller.code_driver.is_initialized assert self.project_controller.file_driver.is_initialized def test_init_reinit_success(self): _ = self.project_controller.init("test1", "test description") # Test out functionality for re-initialize project result = self.project_controller.init("anything", "else") assert self.project_controller.model.name == "anything" assert self.project_controller.model.description == "else" assert result == True def test_cleanup_no_environment(self): self.project_controller.init("test2", "test description") result = self.project_controller.cleanup() assert not self.project_controller.code_driver.is_initialized assert not self.project_controller.file_driver.is_initialized # Ensure that containers built with this image do not exist # assert not self.project_controller.environment_driver.list_containers(filters={ # "ancestor": image_id # }) assert result == True @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_cleanup_with_environment(self): self.project_controller.init("test2", "test description") result = self.project_controller.cleanup() assert not self.project_controller.code_driver.is_initialized assert not self.project_controller.file_driver.is_initialized assert not self.project_controller.environment_driver.list_images( "datmo-test2") # Ensure that containers built with this image do not exist # assert not self.project_controller.environment_driver.list_containers(filters={ # "ancestor": image_id # }) assert result == True def test_status_basic(self): self.project_controller.init("test3", "test description") status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \ self.project_controller.status() assert status_dict assert isinstance(status_dict, dict) assert status_dict['name'] == "test3" assert status_dict['description'] == "test description" assert isinstance(status_dict['config'], dict) assert not current_snapshot assert not latest_snapshot_user_generated assert not latest_snapshot_auto_generated assert unstaged_code # no files, but unstaged because blank commit id has not yet been created (no initial snapshot) assert not unstaged_environment assert not unstaged_files @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_status_snapshot_task(self): self.project_controller.init("test4", "test description") self.snapshot_controller = SnapshotController() self.task_controller = TaskController() # Create files to add self.snapshot_controller.file_driver.create("dirpath1", directory=True) self.snapshot_controller.file_driver.create("dirpath2", directory=True) self.snapshot_controller.file_driver.create("filepath1") # Create environment definition env_def_path = os.path.join(self.snapshot_controller.home, "Dockerfile") with open(env_def_path, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) environment_paths = [env_def_path] # Create config config_filepath = os.path.join(self.snapshot_controller.home, "config.json") with open(config_filepath, "wb") as f: f.write(to_bytes(str("{}"))) # Create stats stats_filepath = os.path.join(self.snapshot_controller.home, "stats.json") with open(stats_filepath, "wb") as f: f.write(to_bytes(str("{}"))) input_dict = { "message": "my test snapshot", "paths": [ os.path.join(self.snapshot_controller.home, "dirpath1"), os.path.join(self.snapshot_controller.home, "dirpath2"), os.path.join(self.snapshot_controller.home, "filepath1") ], "environment_paths": environment_paths, "config_filename": config_filepath, "stats_filename": stats_filepath, } # Create snapshot in the project, then wait, and try status first_snapshot = self.snapshot_controller.create(input_dict) status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \ self.project_controller.status() assert status_dict assert isinstance(status_dict, dict) assert status_dict['name'] == "test4" assert status_dict['description'] == "test description" assert isinstance(status_dict['config'], dict) assert not current_snapshot # snapshot was created from other environments and files (so user is not on any current snapshot) assert isinstance(latest_snapshot_user_generated, Snapshot) assert latest_snapshot_user_generated == first_snapshot assert not latest_snapshot_auto_generated assert not unstaged_code assert not unstaged_environment assert not unstaged_files # Create and run a task and test if task is shown first_task = self.task_controller.create() # Create task_dict task_command = ["sh", "-c", "echo accuracy:0.45"] task_dict = {"command_list": task_command} updated_first_task = self.task_controller.run( first_task.id, task_dict=task_dict) before_snapshot_obj = self.task_controller.dal.snapshot.get_by_id( updated_first_task.before_snapshot_id) after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id( updated_first_task.after_snapshot_id) before_environment_obj = self.task_controller.dal.environment.get_by_id( before_snapshot_obj.environment_id) after_environment_obj = self.task_controller.dal.environment.get_by_id( after_snapshot_obj.environment_id) assert before_environment_obj == after_environment_obj self.environment_ids.append(after_environment_obj.id) status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \ self.project_controller.status() assert status_dict assert isinstance(status_dict, dict) assert status_dict['name'] == "test4" assert status_dict['description'] == "test description" assert isinstance(status_dict['config'], dict) assert isinstance(current_snapshot, Snapshot) assert isinstance(latest_snapshot_user_generated, Snapshot) assert latest_snapshot_user_generated == first_snapshot assert isinstance(latest_snapshot_auto_generated, Snapshot) # current snapshot is the before snapshot for the run assert current_snapshot == before_snapshot_obj assert current_snapshot != latest_snapshot_auto_generated assert current_snapshot != latest_snapshot_user_generated # latest autogenerated snapshot is the after snapshot id assert latest_snapshot_auto_generated == after_snapshot_obj assert latest_snapshot_auto_generated != latest_snapshot_user_generated # user generated snapshot is not associated with any before or after snapshot assert latest_snapshot_user_generated != before_snapshot_obj assert latest_snapshot_user_generated != after_snapshot_obj assert not unstaged_code assert not unstaged_environment assert not unstaged_files
def test_status_snapshot_task(self): self.project_controller.init("test4", "test description") self.snapshot_controller = SnapshotController() self.task_controller = TaskController() # Create files to add self.snapshot_controller.file_driver.create("dirpath1", directory=True) self.snapshot_controller.file_driver.create("dirpath2", directory=True) self.snapshot_controller.file_driver.create("filepath1") # Create environment definition env_def_path = os.path.join(self.snapshot_controller.home, "Dockerfile") with open(env_def_path, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) environment_paths = [env_def_path] # Create config config_filepath = os.path.join(self.snapshot_controller.home, "config.json") with open(config_filepath, "wb") as f: f.write(to_bytes(str("{}"))) # Create stats stats_filepath = os.path.join(self.snapshot_controller.home, "stats.json") with open(stats_filepath, "wb") as f: f.write(to_bytes(str("{}"))) input_dict = { "message": "my test snapshot", "paths": [ os.path.join(self.snapshot_controller.home, "dirpath1"), os.path.join(self.snapshot_controller.home, "dirpath2"), os.path.join(self.snapshot_controller.home, "filepath1") ], "environment_paths": environment_paths, "config_filename": config_filepath, "stats_filename": stats_filepath, } # Create snapshot in the project, then wait, and try status first_snapshot = self.snapshot_controller.create(input_dict) status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \ self.project_controller.status() assert status_dict assert isinstance(status_dict, dict) assert status_dict['name'] == "test4" assert status_dict['description'] == "test description" assert isinstance(status_dict['config'], dict) assert not current_snapshot # snapshot was created from other environments and files (so user is not on any current snapshot) assert isinstance(latest_snapshot_user_generated, Snapshot) assert latest_snapshot_user_generated == first_snapshot assert not latest_snapshot_auto_generated assert not unstaged_code assert not unstaged_environment assert not unstaged_files # Create and run a task and test if task is shown first_task = self.task_controller.create() # Create task_dict task_command = ["sh", "-c", "echo accuracy:0.45"] task_dict = {"command_list": task_command} updated_first_task = self.task_controller.run( first_task.id, task_dict=task_dict) before_snapshot_obj = self.task_controller.dal.snapshot.get_by_id( updated_first_task.before_snapshot_id) after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id( updated_first_task.after_snapshot_id) before_environment_obj = self.task_controller.dal.environment.get_by_id( before_snapshot_obj.environment_id) after_environment_obj = self.task_controller.dal.environment.get_by_id( after_snapshot_obj.environment_id) assert before_environment_obj == after_environment_obj self.environment_ids.append(after_environment_obj.id) status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \ self.project_controller.status() assert status_dict assert isinstance(status_dict, dict) assert status_dict['name'] == "test4" assert status_dict['description'] == "test description" assert isinstance(status_dict['config'], dict) assert isinstance(current_snapshot, Snapshot) assert isinstance(latest_snapshot_user_generated, Snapshot) assert latest_snapshot_user_generated == first_snapshot assert isinstance(latest_snapshot_auto_generated, Snapshot) # current snapshot is the before snapshot for the run assert current_snapshot == before_snapshot_obj assert current_snapshot != latest_snapshot_auto_generated assert current_snapshot != latest_snapshot_user_generated # latest autogenerated snapshot is the after snapshot id assert latest_snapshot_auto_generated == after_snapshot_obj assert latest_snapshot_auto_generated != latest_snapshot_user_generated # user generated snapshot is not associated with any before or after snapshot assert latest_snapshot_user_generated != before_snapshot_obj assert latest_snapshot_user_generated != after_snapshot_obj assert not unstaged_code assert not unstaged_environment assert not unstaged_files
def create(self, **kwargs): self.snapshot_controller = SnapshotController() self.cli_helper.echo(__("info", "cli.snapshot.create")) run_id = kwargs.get("run_id", None) # creating snapshot with task id if it exists if run_id is not None: excluded_args = [ "environment_id", "environment_paths", "paths", "config_filepath", "config_filename", "stats_filepath", "stats_filename" ] for arg in excluded_args: if arg in kwargs and kwargs[arg] is not None: raise SnapshotCreateFromTaskArgs( "error", "cli.snapshot.create.run.args", arg) message = kwargs.get("message", None) label = kwargs.get("label", None) # Create a new core snapshot object snapshot_task_obj = self.snapshot_controller.create_from_task( message, run_id, label=label) self.cli_helper.echo( "Created snapshot id: %s" % snapshot_task_obj.id) return snapshot_task_obj else: # creating snapshot without task id snapshot_dict = {"visible": True} # Environment if kwargs.get("environment_id", None) or kwargs.get( "environment_paths", None): mutually_exclusive_args = [ "environment_id", "environment_paths" ] mutually_exclusive(mutually_exclusive_args, kwargs, snapshot_dict) # File if kwargs.get("paths", None): snapshot_dict['paths'] = kwargs['paths'] # Config if kwargs.get("config_filepath", None) or kwargs.get( "config_filename", None) or kwargs.get("config", None): mutually_exclusive_args = [ "config_filepath", "config_filename", "config" ] mutually_exclusive(mutually_exclusive_args, kwargs, snapshot_dict) # parsing config if "config" in snapshot_dict: config = {} config_list = snapshot_dict["config"] for item in config_list: item_parsed_dict = parse_cli_key_value(item, 'config') config.update(item_parsed_dict) snapshot_dict["config"] = config # Stats if kwargs.get("stats_filepath", None) or kwargs.get( "stats_filename", None) or kwargs.get("config", None): mutually_exclusive_args = [ "stats_filepath", "stats_filename", "stats" ] mutually_exclusive(mutually_exclusive_args, kwargs, snapshot_dict) # parsing stats if "stats" in snapshot_dict: stats = {} stats_list = snapshot_dict["stats"] for item in stats_list: item_parsed_dict = parse_cli_key_value(item, 'stats') stats.update(item_parsed_dict) snapshot_dict["stats"] = stats optional_args = ["message", "label"] for arg in optional_args: if arg in kwargs and kwargs[arg] is not None: snapshot_dict[arg] = kwargs[arg] snapshot_obj = self.snapshot_controller.create(snapshot_dict) # Because snapshots may be invisible to the user, this function ensures that by the end # the user can monitor the snapshot on the CLI, but making it visible snapshot_obj = self.snapshot_controller.update( snapshot_obj.id, visible=True) self.cli_helper.echo( __("info", "cli.snapshot.create.success", snapshot_obj.id)) return snapshot_obj