Пример #1
0
def delete(snapshot_id=None):
    """Delete a snapshot within a project

    The project must be created before this is implemented. You can do that by using
    the following command::

        $ datmo init


    Parameters
    ----------
    snapshot_id : str
        snapshot id to be updated

    Returns
    -------
    snapshot entity
        returns a Snapshot entity

    Examples
    --------
    You can use this function within a project repository to delete a snapshot.

    >>> import datmo
    >>> datmo.snapshot.delete(snapshot_id="4L24adFfsa")
    """
    snapshot_controller = SnapshotController()

    snapshot_controller.delete(snapshot_id=snapshot_id)
Пример #2
0
def create_from_task(message, task_id, home=None):
    """Create a snapshot within a project from a completed task

    Parameters
    ----------
    message : str
        a description of the snapshot for later reference
    task_id : str
        task object id to use to create snapshot

    Returns
    -------
    Snapshot
        returns a Snapshot entity as defined above

    Examples
    --------
    You can use this function within a project repository to save snapshots
    for later use. Once you have created this, you will be able to view the
    snapshot with the `datmo snapshot ls` cli command

    >>> import datmo
    >>> datmo.snapshot.create_from_task(message="my first snapshot from task", task_id="1jfkshg049")
    """
    if not home:
        home = os.getcwd()
    snapshot_controller = SnapshotController(home=home)

    # Create a new core snapshot object
    core_snapshot_obj = snapshot_controller.create_from_task(message, task_id)

    # Create a new snapshot object
    client_snapshot_obj = Snapshot(core_snapshot_obj, home=home)

    return client_snapshot_obj
Пример #3
0
 def __init__(self, home):
     super(TaskController, self).__init__(home)
     self.environment = EnvironmentController(home)
     self.snapshot = SnapshotController(home)
     if not self.is_initialized:
         raise ProjectNotInitializedException(
             __("error", "controller.task.__init__"))
Пример #4
0
 def inspect(self, **kwargs):
     self.snapshot_controller = SnapshotController()
     snapshot_id = kwargs.get("id", None)
     snapshot_obj = self.snapshot_controller.get(snapshot_id)
     output = str(snapshot_obj)
     self.cli_helper.echo(output)
     return output
Пример #5
0
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system(
        ) == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)

        self.project = ProjectController(self.temp_dir)
        self.project.init("test", "test description")
        self.snapshot = SnapshotController(self.temp_dir)

        # Create environment_driver definition
        self.env_def_path = os.path.join(self.temp_dir, "Dockerfile")
        with open(self.env_def_path, "w") as f:
            f.write(str("FROM datmo/xgboost:cpu"))

        # Create config
        self.config_filepath = os.path.join(self.snapshot.home, "config.json")
        with open(self.config_filepath, "w") as f:
            f.write(str('{"foo":1}'))

        # Create stats
        self.stats_filepath = os.path.join(self.snapshot.home, "stats.json")
        with open(self.stats_filepath, "w") as f:
            f.write(str('{"bar":1}'))

        # Create test file
        self.filepath = os.path.join(self.snapshot.home, "file.txt")
        with open(self.filepath, "w") as f:
            f.write(str("test"))
Пример #6
0
 def diff(self, **kwargs):
     self.snapshot_controller = SnapshotController()
     snapshot_id_1 = kwargs.get("id_1", None)
     snapshot_id_2 = kwargs.get("id_2", None)
     snapshot_obj_1 = self.snapshot_controller.get(snapshot_id_1)
     snapshot_obj_2 = self.snapshot_controller.get(snapshot_id_2)
     comparison_attributes = [
         "id", "created_at", "message", "label", "code_id",
         "environment_id", "file_collection_id"
     ]
     table_data = [["Attributes", "Snapshot 1", "", "Snapshot 2"],
                   ["", "", "", ""]]
     for attribute in comparison_attributes:
         value_1 = getattr(snapshot_obj_1, attribute) if getattr(
             snapshot_obj_1, attribute) else "N/A"
         value_2 = getattr(snapshot_obj_2, attribute) if getattr(
             snapshot_obj_2, attribute) else "N/A"
         if isinstance(value_1, datetime):
             value_1 = prettify_datetime(value_1)
         if isinstance(value_2, datetime):
             value_2 = prettify_datetime(value_2)
         table_data.append([attribute, value_1, "->", value_2])
     output = format_table(table_data)
     self.cli_helper.echo(output)
     return output
Пример #7
0
 def delete(self, **kwargs):
     self.snapshot_controller = SnapshotController()
     self.cli_helper.echo(__("info", "cli.snapshot.delete"))
     snapshot_id = kwargs.get('id')
     result = self.snapshot_controller.delete(snapshot_id)
     self.cli_helper.echo(
         __("info", "cli.snapshot.delete.success", snapshot_id))
     return result
Пример #8
0
 def checkout(self, **kwargs):
     self.snapshot_controller = SnapshotController()
     snapshot_id = kwargs.get('id')
     checkout_success = self.snapshot_controller.checkout(snapshot_id)
     if checkout_success:
         self.cli_helper.echo(
             __("info", "cli.snapshot.checkout.success", snapshot_id))
     return self.snapshot_controller.checkout(snapshot_id)
Пример #9
0
    def __init__(self):
        super(TaskController, self).__init__()
        self.environment = EnvironmentController()
        self.snapshot = SnapshotController()
        self.spinner = Spinner()

        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.task.__init__"))
Пример #10
0
def ls(filter=None):
    """List snapshots within a project

    The project must be created before this is implemented. You can do that by using
    the following command::

        $ datmo init


    Parameters
    ----------
    filter : str, optional
        a string to use to filter from message and label
        (default is to give all snapshots, unless provided a specific string. eg: best)

    Returns
    -------
    list
        returns a list of Snapshot entities (as defined above)

    Examples
    --------
    You can use this function within a project repository to list snapshots.

    >>> import datmo
    >>> snapshots = datmo.snapshot.ls()
    """

    snapshot_controller = SnapshotController()

    # add arguments if they are not None

    core_snapshot_objs = snapshot_controller.list(visible=True,
                                                  sort_key='created_at',
                                                  sort_order='descending')

    # Filtering Snapshots
    # TODO: move to list function in SnapshotController
    # Add in preliminary snapshots if no filter
    filtered_core_snapshot_objs = [
        core_snapshot_obj for core_snapshot_obj in core_snapshot_objs
        if core_snapshot_obj.visible and not filter
    ]
    # If filter is present then use it and only add those that pass filter
    for core_snapshot_obj in core_snapshot_objs:
        if core_snapshot_obj.visible:
            if filter and \
                ((filter in core_snapshot_obj.message) \
                    or (core_snapshot_obj.label != None and filter in core_snapshot_obj.label)):
                filtered_core_snapshot_objs.append(core_snapshot_obj)

    # Return Snapshot entities
    return [
        Snapshot(filtered_core_snapshot_obj)
        for filtered_core_snapshot_obj in filtered_core_snapshot_objs
    ]
Пример #11
0
    def __get_core_snapshot(self):
        """Returns the latest core snapshot object for id

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            core snapshot object for the snapshot
        """
        snapshot_controller = SnapshotController()
        return snapshot_controller.get(self.id)
Пример #12
0
 def setup_method(self):
     # provide mountable tmp directory for docker
     tempfile.tempdir = "/tmp" if not platform.system(
     ) == "Windows" else None
     test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                     tempfile.gettempdir())
     self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
     self.project = ProjectController(self.temp_dir)
     self.project.init("test", "test description")
     self.task = TaskController(self.temp_dir)
     self.snapshot = SnapshotController(self.temp_dir)
Пример #13
0
    def __get_core_snapshot(self):
        """Returns the latest core snapshot object for id

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            core snapshot object for the Snapshot
        """
        snapshot_controller = SnapshotController()
        snapshot_id = self.after_snapshot_id if self.after_snapshot_id else self.before_snapshot_id
        snapshot_obj = snapshot_controller.get(snapshot_id)
        return snapshot_obj
Пример #14
0
def update(snapshot_id=None,
           config=None,
           stats=None,
           message=None,
           label=None):
    """Update a snapshot within a project

    The project must be created before this is implemented. You can do that by using
    the following command::

        $ datmo init


    Parameters
    ----------
    snapshot_id : str
        snapshot id to be updated
    config : dict, optional
        provide the dictionary of configurations to update
        (default is None, which means it is not being updated)
    stats : dict, optional
        provide the dictionary of relevant statistics or metrics to update
        (default is None, which means it is not being updated)
    message : str, optional
        a string to use as a new message for the snapshot
        (default is the already given message to that snapshot, unless provided a specific string.)
    label : str, optional
        a string to use as a new label for the snapshot
        (default is the already given label to that snapshot, unless provided a specific string.)

    Returns
    -------
    snapshot entity
        returns a Snapshot entity

    Examples
    --------
    You can use this function within a project repository to update a snapshot.

    >>> import datmo
    >>> snapshots = datmo.snapshot.update(snapshot_id="4L24adFfsa", config={"depth": "10", "learning_rate": "0.91"},
    ...          stats={"acc": "91.34", "f1_score": "0.91"}, message="new message", label="best")
    """
    snapshot_controller = SnapshotController()

    return snapshot_controller.update(snapshot_id=snapshot_id,
                                      config=config,
                                      stats=stats,
                                      message=message,
                                      label=label)
Пример #15
0
    def rerun(self, **kwargs):
        self.task_controller = TaskController()
        # Get task id
        task_id = kwargs.get("id", None)
        self.cli_helper.echo(__("info", "cli.run.rerun", task_id))
        # Create the task_obj
        task_obj = self.task_controller.get(task_id)
        # Create the run obj
        run_obj = Run(task_obj)
        # Select the initial snapshot if it's a script else the final snapshot
        initial = True if run_obj.type == 'script' else False
        environment_id = run_obj.environment_id
        command = task_obj.command_list
        snapshot_id = run_obj.core_snapshot_id if not initial else run_obj.before_snapshot_id

        # Checkout to the core snapshot id before rerunning the task
        self.snapshot_controller = SnapshotController()
        try:
            checkout_success = self.snapshot_controller.checkout(snapshot_id)
        except Exception:
            self.cli_helper.echo(__("error", "cli.snapshot.checkout.failure"))
            sys.exit(1)

        if checkout_success:
            self.cli_helper.echo(
                __("info", "cli.snapshot.checkout.success", snapshot_id))

        # Rerunning the task
        # Create input dictionary for the new task
        snapshot_dict = {}
        snapshot_dict["environment_id"] = environment_id
        task_dict = {
            "ports": task_obj.ports,
            "interactive": task_obj.interactive,
            "mem_limit": task_obj.mem_limit,
            "command_list": command,
            "data_file_path_map": task_obj.data_file_path_map,
            "data_directory_path_map": task_obj.data_directory_path_map,
            "workspace": task_obj.workspace
        }
        # Run task and return Task object result
        new_task_obj = self.task_run_helper(task_dict, snapshot_dict,
                                            "cli.run.run")
        if not new_task_obj:
            return False
        # Creating the run object
        new_run_obj = Run(new_task_obj)
        return new_run_obj
Пример #16
0
    def get_files(self, mode="r"):
        """Returns a list of file objects for the snapshot

        Parameters
        ----------
        mode : str
            file object mode
            (default is "r" which signifies read mode)

        Returns
        -------
        list
            list of file objects associated with the snapshot
        """
        snapshot_controller = SnapshotController()
        return snapshot_controller.get_files(self.id, mode=mode)
Пример #17
0
 def test_init_fail_project_not_init(self):
     failed = False
     try:
         SnapshotController(self.temp_dir)
     except ProjectNotInitializedException:
         failed = True
     assert failed
Пример #18
0
 def test_init_fail_invalid_path(self):
     test_home = "some_random_dir"
     failed = False
     try:
         SnapshotController(test_home)
     except InvalidProjectPathException:
         failed = True
     assert failed
Пример #19
0
 def test_init_fail_project_not_init(self):
     Config().set_home(self.temp_dir)
     failed = False
     try:
         SnapshotController()
     except ProjectNotInitialized:
         failed = True
     assert failed
Пример #20
0
    def get_files(self, mode="r"):
        """Returns a list of file objects for the task

        Parameters
        ----------
        mode : str
            file object mode
            (default is "r" which signifies read mode)

        Returns
        -------
        list or None
            list of file objects associated with the task
        """
        snapshot_controller = SnapshotController()
        self._core_snapshot = self.__get_core_snapshot()
        return snapshot_controller.get_files(
            self._core_snapshot.id, mode=mode) if self._core_snapshot else None
Пример #21
0
 def test_init_fail_invalid_path(self):
     test_home = "some_random_dir"
     Config().set_home(test_home)
     failed = False
     try:
         SnapshotController()
     except InvalidProjectPath:
         failed = True
     assert failed
Пример #22
0
 def ls(self, **kwargs):
     # Create controllers
     self.task_controller = TaskController()
     self.snapshot_controller = SnapshotController()
     session_id = kwargs.get('session_id',
                             self.task_controller.current_session.id)
     print_format = kwargs.get('format', "table")
     download = kwargs.get('download', None)
     download_path = kwargs.get('download_path', None)
     # Get all task meta information
     task_objs = self.task_controller.list(session_id,
                                           sort_key="created_at",
                                           sort_order="descending")
     header_list = [
         "id", "command", "status", "config", "results", "created at"
     ]
     item_dict_list = []
     run_obj_list = []
     for task_obj in task_objs:
         # Create a new Run Object from Task Object
         run_obj = RunObject(task_obj)
         task_results_printable = printable_object(str(run_obj.results))
         snapshot_config_printable = printable_object(str(run_obj.config))
         item_dict_list.append({
             "id":
             run_obj.id,
             "command":
             run_obj.command,
             "status":
             run_obj.status,
             "config":
             snapshot_config_printable,
             "results":
             task_results_printable,
             "created at":
             prettify_datetime(run_obj.created_at)
         })
         run_obj_list.append(run_obj)
     if download:
         if not download_path:
             # download to current working directory with timestamp
             current_time = datetime.utcnow()
             epoch_time = datetime.utcfromtimestamp(0)
             current_time_unix_time_ms = (
                 current_time - epoch_time).total_seconds() * 1000.0
             download_path = os.path.join(
                 os.getcwd(), "run_ls_" + str(current_time_unix_time_ms))
         self.cli_helper.print_items(header_list,
                                     item_dict_list,
                                     print_format=print_format,
                                     output_path=download_path)
         return task_objs
     self.cli_helper.print_items(header_list,
                                 item_dict_list,
                                 print_format=print_format)
     return run_obj_list
Пример #23
0
    def update(self, **kwargs):
        self.snapshot_controller = SnapshotController()
        self.cli_helper.echo(__("info", "cli.snapshot.update"))
        snapshot_id = kwargs.get('id')
        # getting previous saved config and stats
        snapshot_obj = self.snapshot_controller.get(snapshot_id)
        config = snapshot_obj.config
        stats = snapshot_obj.stats

        # extracting config
        update_config_list = kwargs.get('config', None)
        if update_config_list:
            update_config = {}
            for item in update_config_list:
                item_parsed_dict = parse_cli_key_value(item, 'config')
                update_config.update(item_parsed_dict)
            # updating config
            config.update(update_config)

        # extracting stats
        update_stats_list = kwargs.get('stats', None)
        if update_stats_list:
            update_stats = {}
            for item in update_stats_list:
                item_parsed_dict = parse_cli_key_value(item, 'stats')
                update_stats.update(item_parsed_dict)
            # updating stats
            stats.update(update_stats)

        # extracting message
        message = kwargs.get('message', None)
        # extracting label
        label = kwargs.get('label', None)

        result = self.snapshot_controller.update(
            snapshot_id,
            config=config,
            stats=stats,
            message=message,
            label=label)
        self.cli_helper.echo(
            __("info", "cli.snapshot.update.success", snapshot_id))
        return result
Пример #24
0
 def diff(self, **kwargs):
     self.snapshot_controller = SnapshotController()
     snapshot_id_1 = kwargs.get("id_1", None)
     snapshot_id_2 = kwargs.get("id_2", None)
     snapshot_obj_1 = self.snapshot_controller.get(snapshot_id_1)
     snapshot_obj_2 = self.snapshot_controller.get(snapshot_id_2)
     comparison_attributes = [
         "id", "created_at", "message", "label", "code_id",
         "environment_id", "file_collection_id", "config", "stats"
     ]
     table_data = [["Attributes", "Snapshot 1", "", "Snapshot 2"],
                   ["", "", "", ""]]
     for attribute in comparison_attributes:
         value_1 = getattr(snapshot_obj_1, attribute) if getattr(
             snapshot_obj_1, attribute) else "N/A"
         value_2 = getattr(snapshot_obj_2, attribute) if getattr(
             snapshot_obj_2, attribute) else "N/A"
         if isinstance(value_1, datetime):
             value_1 = prettify_datetime(value_1)
         if isinstance(value_2, datetime):
             value_2 = prettify_datetime(value_2)
         if attribute in ["config", "stats"]:
             alldict = []
             if isinstance(value_1, dict): alldict.append(value_1)
             if isinstance(value_2, dict): alldict.append(value_2)
             allkey = set().union(*alldict)
             for key in allkey:
                 key_value_1 = "%s: %s" % (key, value_1[key]) if value_1 != "N/A" and value_1.get(key, None) \
                     else "N/A"
                 key_value_2 = "%s: %s" % (key, value_2[key]) if value_2 != "N/A" and value_2.get(key, None) \
                     else "N/A"
                 table_data.append(
                     [attribute, key_value_1, "->", key_value_2])
         else:
             table_data.append([attribute, value_1, "->", value_2])
     output = format_table(table_data)
     self.cli_helper.echo(output)
     return output
Пример #25
0
    def run(self, **kwargs):
        self.cli_helper.echo(__("info", "cli.task.run"))
        # Create controllers
        self.task_controller = TaskController()
        self.snapshot_controller = SnapshotController()
        # Create input dictionaries
        snapshot_dict = {}

        # Environment
        if kwargs.get("environment_id", None) or kwargs.get(
                "environment_paths", None):
            mutually_exclusive_args = ["environment_id", "environment_paths"]
            mutually_exclusive(mutually_exclusive_args, kwargs, snapshot_dict)
        task_dict = {
            "ports": kwargs['ports'],
            "interactive": kwargs['interactive'],
            "mem_limit": kwargs['mem_limit']
        }
        if not isinstance(kwargs['cmd'], list):
            if platform.system() == "Windows":
                task_dict['command'] = kwargs['cmd']
            elif isinstance(kwargs['cmd'], basestring):
                task_dict['command_list'] = shlex.split(kwargs['cmd'])
        else:
            task_dict['command_list'] = kwargs['cmd']

        # Create the task object
        task_obj = self.task_controller.create()
        try:
            # Pass in the task to run
            updated_task_obj = self.task_controller.run(
                task_obj.id, snapshot_dict=snapshot_dict, task_dict=task_dict)
        except Exception as e:
            self.logger.error("%s %s" % (e, task_dict))
            self.cli_helper.echo("%s" % e)
            self.cli_helper.echo(__("error", "cli.task.run", task_obj.id))
            return False

        self.cli_helper.echo(
            __("info", "cli.task.run.complete", updated_task_obj.id))
        return updated_task_obj
Пример #26
0
    def status(self):
        """Return the project status information if initialized

        Returns
        -------
        status_dict : dict
            dictionary with project metadata and config
        current_snapshot : datmo.core.entity.snapshot.Snapshot
            snapshot object of the current state of the repo if present else None
        latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated by the user if present else None
        latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated automatically by datmo if present else None
        unstaged_code : bool
            True if code has unstaged changes
        unstaged_environment : bool
            True if environment has unstaged changes
        unstaged_files : bool
            True if files have unstaged changes
        """
        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.project.status"))
        # TODO: Add in note when environment is not setup or intialized

        # Add in project metadata
        status_dict = self.model.to_dictionary().copy()

        # Find  all project settings
        status_dict["config"] = self.config_store.to_dict()

        # Find the latest snapshot generated by the user
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": True
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_user_generated = descending_snapshots[
            0] if descending_snapshots else None

        # Show the latest snapshot generated automatically by datmo
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": False
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_auto_generated = descending_snapshots[
            0] if descending_snapshots else None

        # TODO: add in latest run

        self.code_controller = CodeController()
        try:
            unstaged_code = self.code_controller.check_unstaged_changes()
        except UnstagedChanges:
            unstaged_code = True

        self.environment_controller = EnvironmentController()
        try:
            unstaged_environment = self.environment_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_environment = True

        self.file_collection_controller = FileCollectionController()
        try:
            unstaged_files = self.file_collection_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_files = True

        # If exists, obtain the current snapshot, if unstaged changes, will be None
        self.snapshot_controller = SnapshotController()
        try:
            current_snapshot = self.snapshot_controller.current_snapshot()
        except UnstagedChanges:
            current_snapshot = None

        return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \
               unstaged_code, unstaged_environment, unstaged_files
Пример #27
0
class ProjectController(BaseController):
    """ProjectController inherits from BaseController and manages business logic related to the
    project. One model is associated with each project currently.

    Methods
    -------
    init(name, description)
        Initialize the project repository as a new model or update the existing project
    cleanup()
        Remove all datmo references from the current repository. NOTE: THIS WILL DELETE ALL DATMO WORK
    status()
        Give the user a picture of the status of the project, snapshots, and tasks
    """

    def __init__(self):
        super(ProjectController, self).__init__()

    def init(self, name, description):
        """ Initialize the project

        This function will initialize the project or reinitialize it the project is
        already initialized.

        Parameters
        ----------
        name : str
        description : str

        Returns
        -------
        bool
        """
        is_new_model = False
        old_model = self.model
        if not self.model:
            is_new_model = True

        try:
            # Always validate inputs to the init function
            validate("create_project", {
                "name": name,
                "description": description
            })

            # Initialize File Driver if needed
            if not self.file_driver.is_initialized:
                self.file_driver.init()

            # Initialize the dal
            if not self.dal.is_initialized:
                self.dal.init()

            # Initialize Code Driver if needed
            if not self.code_driver.is_initialized:
                self.code_driver.init()

            # Initialize Environment Driver if needed
            if not self.environment_driver.is_initialized:
                self.environment_driver.init()

            # Initialize the config JSON store
            self.config_store = JSONStore(
                os.path.join(self.home,
                             Config().datmo_directory_name, ".config"))

            # Create model if new else update
            if is_new_model:
                _ = self.dal.model.create(
                    Model({
                        "name": name,
                        "description": description
                    }))
            else:
                self._model = self.dal.model.update({
                    "id": self.model.id,
                    "name": name,
                    "description": description
                })

            # Connect Environment Driver if needed
            # (not required but will warn if not present)
            try:
                if not self.environment_driver.is_connected:
                    self.environment_driver.connect()
            except EnvironmentConnectFailed:
                self.logger.warning(
                    __("warn", "controller.general.environment.failed"))

            # Build the initial default Environment (NOT NECESSARY)
            # self.environment_driver.build_image(tag="datmo-" + \
            #                                  self.model.name)
            return True
        except Exception:
            # if any error occurred with new model, ensure no initialize occurs and raise previous error
            # if any error occurred with existing model, ensure no updates were made, raise previous error
            if is_new_model:
                self.cleanup()
            else:
                self._model = self.dal.model.update({
                    "id": old_model.id,
                    "name": old_model.name,
                    "description": old_model.description
                })
            raise

    def cleanup(self):
        """Cleans the project structure completely

        Notes
        -----
        This function will not error out but will gracefully exit, since
        it is used in cases where init fails as a check against mid-initialized
        projects

        Returns
        -------
        bool
        """
        if not self.is_initialized:
            self.logger.warning(
                __("warn", "controller.project.cleanup.not_init"))
        # Remove Datmo environment_driver references, give warning if error
        try:
            # Obtain image id before cleaning up if exists
            images = self.environment_driver.list_images(name="datmo-" + \
                                                              self.model.name)
            image_id = images[0].id if images else None
        except Exception:
            self.logger.warning(
                __("warn", "controller.project.cleanup.environment"))

        # Remove Datmo code_driver references, give warning if error
        try:
            if self.code_driver.is_initialized:
                for ref in self.code_driver.list_refs():
                    self.code_driver.delete_ref(ref)
        except Exception:
            self.logger.warning(__("warn", "controller.project.cleanup.code"))
        try:
            # Remove Hidden Datmo file structure, give warning if error
            self.file_driver.delete_hidden_datmo_file_structure()
        except (FileIOError, PathDoesNotExist):
            self.logger.warning(__("warn", "controller.project.cleanup.files"))

        try:
            if image_id:
                # Remove image created during init
                self.environment_driver.remove_image(
                    image_id_or_name=image_id, force=True)

                # Remove any dangling images (optional)

                # Stop and remove all running environments with image_id
                self.environment_driver.stop_remove_containers_by_term(
                    image_id, force=True)
        except Exception:
            self.logger.warning(
                __("warn", "controller.project.cleanup.environment"))

        return True

    def status(self):
        """Return the project status information if initialized

        Returns
        -------
        status_dict : dict
            dictionary with project metadata and config
        current_snapshot : datmo.core.entity.snapshot.Snapshot
            snapshot object of the current state of the repo if present else None
        latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated by the user if present else None
        latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot
            snapshot object of the latest snapshot generated automatically by datmo if present else None
        unstaged_code : bool
            True if code has unstaged changes
        unstaged_environment : bool
            True if environment has unstaged changes
        unstaged_files : bool
            True if files have unstaged changes
        """
        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.project.status"))
        # TODO: Add in note when environment is not setup or intialized

        # Add in project metadata
        status_dict = self.model.to_dictionary().copy()

        # Find  all project settings
        status_dict["config"] = self.config_store.to_dict()

        # Find the latest snapshot generated by the user
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": True
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_user_generated = descending_snapshots[
            0] if descending_snapshots else None

        # Show the latest snapshot generated automatically by datmo
        descending_snapshots = self.dal.snapshot.query(
            {
                "visible": False
            }, sort_key="created_at", sort_order="descending")
        latest_snapshot_auto_generated = descending_snapshots[
            0] if descending_snapshots else None

        # TODO: add in latest run

        self.code_controller = CodeController()
        try:
            unstaged_code = self.code_controller.check_unstaged_changes()
        except UnstagedChanges:
            unstaged_code = True

        self.environment_controller = EnvironmentController()
        try:
            unstaged_environment = self.environment_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_environment = True

        self.file_collection_controller = FileCollectionController()
        try:
            unstaged_files = self.file_collection_controller.check_unstaged_changes(
            )
        except UnstagedChanges:
            unstaged_files = True

        # If exists, obtain the current snapshot, if unstaged changes, will be None
        self.snapshot_controller = SnapshotController()
        try:
            current_snapshot = self.snapshot_controller.current_snapshot()
        except UnstagedChanges:
            current_snapshot = None

        return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \
               unstaged_code, unstaged_environment, unstaged_files
Пример #28
0
class TestProjectController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()
        self.environment_ids = []

    def teardown_method(self):
        if not check_docker_inactive(test_datmo_dir):
            self.project_controller = ProjectController()
            if self.project_controller.is_initialized:
                self.environment_controller = EnvironmentController()
                for env_id in list(set(self.environment_ids)):
                    if not self.environment_controller.delete(env_id):
                        raise Exception

    def test_init_failure_none(self):
        # Test failed case
        failed = False
        try:
            self.project_controller.init(None, None)
        except ValidationFailed:
            failed = True
        assert failed

    def test_init_failure_empty_str(self):
        # Test failed case
        failed = False
        try:
            self.project_controller.init("", "")
        except ValidationFailed:
            failed = True
        assert failed
        assert not self.project_controller.code_driver.is_initialized
        assert not self.project_controller.file_driver.is_initialized

    def test_init_failure_git_code_driver(self):
        # Create a HEAD.lock file in .git to make GitCodeDriver.init() fail
        if self.project_controller.code_driver.type == "git":
            git_dir = os.path.join(
                self.project_controller.code_driver.filepath, ".git")
            os.makedirs(git_dir)
            with open(os.path.join(git_dir, "HEAD.lock"), "a+") as f:
                f.write(to_bytes("test"))
            failed = False
            try:
                self.project_controller.init("test1", "test description")
            except Exception:
                failed = True
            assert failed
            assert not self.project_controller.code_driver.is_initialized
            assert not self.project_controller.file_driver.is_initialized

    def test_init_success(self):
        result = self.project_controller.init("test1", "test description")

        # Tested with is_initialized
        assert self.project_controller.model.name == "test1"
        assert self.project_controller.model.description == "test description"
        assert result and self.project_controller.is_initialized

        # Changeable by user, not tested in is_initialized
        assert self.project_controller.current_session.name == "default"

    # TODO: Test lower level functions (DAL, JSONStore, etc for interruptions)
    # def test_init_with_interruption(self):
    #     # Reinitializing after timed interruption during init
    #     @timeout_decorator.timeout(0.001, use_signals=False)
    #     def timed_init_with_interruption():
    #         result = self.project_controller.init("test1", "test description")
    #         return result
    #
    #     failed = False
    #     try:
    #         timed_init_with_interruption()
    #     except timeout_decorator.timeout_decorator.TimeoutError:
    #         failed = True
    #     # Tested with is_initialized
    #     assert failed
    #
    #     # Reperforming init after a wait of 2 seconds
    #     time.sleep(2)
    #     result = self.project_controller.init("test2", "test description")
    #     # Tested with is_initialized
    #     assert self.project_controller.model.name == "test2"
    #     assert self.project_controller.model.description == "test description"
    #     assert result and self.project_controller.is_initialized
    #
    #     # Changeable by user, not tested in is_initialized
    #     assert self.project_controller.current_session.name == "default"

    def test_init_reinit_failure_empty_str(self):
        _ = self.project_controller.init("test1", "test description")
        failed = True
        try:
            self.project_controller.init("", "")
        except Exception:
            failed = True
        assert failed
        assert self.project_controller.model.name == "test1"
        assert self.project_controller.model.description == "test description"
        assert self.project_controller.code_driver.is_initialized
        assert self.project_controller.file_driver.is_initialized

    def test_init_reinit_success(self):
        _ = self.project_controller.init("test1", "test description")
        # Test out functionality for re-initialize project
        result = self.project_controller.init("anything", "else")

        assert self.project_controller.model.name == "anything"
        assert self.project_controller.model.description == "else"
        assert result == True

    def test_cleanup_no_environment(self):
        self.project_controller.init("test2", "test description")
        result = self.project_controller.cleanup()

        assert not self.project_controller.code_driver.is_initialized
        assert not self.project_controller.file_driver.is_initialized
        # Ensure that containers built with this image do not exist
        # assert not self.project_controller.environment_driver.list_containers(filters={
        #     "ancestor": image_id
        # })
        assert result == True

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_cleanup_with_environment(self):
        self.project_controller.init("test2", "test description")
        result = self.project_controller.cleanup()

        assert not self.project_controller.code_driver.is_initialized
        assert not self.project_controller.file_driver.is_initialized
        assert not self.project_controller.environment_driver.list_images(
            "datmo-test2")
        # Ensure that containers built with this image do not exist
        # assert not self.project_controller.environment_driver.list_containers(filters={
        #     "ancestor": image_id
        # })
        assert result == True

    def test_status_basic(self):
        self.project_controller.init("test3", "test description")
        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test3"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert not current_snapshot
        assert not latest_snapshot_user_generated
        assert not latest_snapshot_auto_generated
        assert unstaged_code  # no files, but unstaged because blank commit id has not yet been created (no initial snapshot)
        assert not unstaged_environment
        assert not unstaged_files

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_status_snapshot_task(self):
        self.project_controller.init("test4", "test description")
        self.snapshot_controller = SnapshotController()
        self.task_controller = TaskController()

        # Create files to add
        self.snapshot_controller.file_driver.create("dirpath1", directory=True)
        self.snapshot_controller.file_driver.create("dirpath2", directory=True)
        self.snapshot_controller.file_driver.create("filepath1")

        # Create environment definition
        env_def_path = os.path.join(self.snapshot_controller.home,
                                    "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        environment_paths = [env_def_path]

        # Create config
        config_filepath = os.path.join(self.snapshot_controller.home,
                                       "config.json")
        with open(config_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        # Create stats
        stats_filepath = os.path.join(self.snapshot_controller.home,
                                      "stats.json")
        with open(stats_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        input_dict = {
            "message":
                "my test snapshot",
            "paths": [
                os.path.join(self.snapshot_controller.home, "dirpath1"),
                os.path.join(self.snapshot_controller.home, "dirpath2"),
                os.path.join(self.snapshot_controller.home, "filepath1")
            ],
            "environment_paths":
                environment_paths,
            "config_filename":
                config_filepath,
            "stats_filename":
                stats_filepath,
        }

        # Create snapshot in the project, then wait, and try status
        first_snapshot = self.snapshot_controller.create(input_dict)

        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test4"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert not current_snapshot  # snapshot was created from other environments and files (so user is not on any current snapshot)
        assert isinstance(latest_snapshot_user_generated, Snapshot)
        assert latest_snapshot_user_generated == first_snapshot
        assert not latest_snapshot_auto_generated
        assert not unstaged_code
        assert not unstaged_environment
        assert not unstaged_files

        # Create and run a task and test if task is shown
        first_task = self.task_controller.create()

        # Create task_dict
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        updated_first_task = self.task_controller.run(
            first_task.id, task_dict=task_dict)
        before_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_first_task.before_snapshot_id)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_first_task.after_snapshot_id)
        before_environment_obj = self.task_controller.dal.environment.get_by_id(
            before_snapshot_obj.environment_id)
        after_environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        assert before_environment_obj == after_environment_obj
        self.environment_ids.append(after_environment_obj.id)

        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test4"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert isinstance(current_snapshot, Snapshot)
        assert isinstance(latest_snapshot_user_generated, Snapshot)
        assert latest_snapshot_user_generated == first_snapshot
        assert isinstance(latest_snapshot_auto_generated, Snapshot)
        # current snapshot is the before snapshot for the run
        assert current_snapshot == before_snapshot_obj
        assert current_snapshot != latest_snapshot_auto_generated
        assert current_snapshot != latest_snapshot_user_generated
        # latest autogenerated snapshot is the after snapshot id
        assert latest_snapshot_auto_generated == after_snapshot_obj
        assert latest_snapshot_auto_generated != latest_snapshot_user_generated
        # user generated snapshot is not associated with any before or after snapshot
        assert latest_snapshot_user_generated != before_snapshot_obj
        assert latest_snapshot_user_generated != after_snapshot_obj
        assert not unstaged_code
        assert not unstaged_environment
        assert not unstaged_files
Пример #29
0
    def test_status_snapshot_task(self):
        self.project_controller.init("test4", "test description")
        self.snapshot_controller = SnapshotController()
        self.task_controller = TaskController()

        # Create files to add
        self.snapshot_controller.file_driver.create("dirpath1", directory=True)
        self.snapshot_controller.file_driver.create("dirpath2", directory=True)
        self.snapshot_controller.file_driver.create("filepath1")

        # Create environment definition
        env_def_path = os.path.join(self.snapshot_controller.home,
                                    "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        environment_paths = [env_def_path]

        # Create config
        config_filepath = os.path.join(self.snapshot_controller.home,
                                       "config.json")
        with open(config_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        # Create stats
        stats_filepath = os.path.join(self.snapshot_controller.home,
                                      "stats.json")
        with open(stats_filepath, "wb") as f:
            f.write(to_bytes(str("{}")))

        input_dict = {
            "message":
                "my test snapshot",
            "paths": [
                os.path.join(self.snapshot_controller.home, "dirpath1"),
                os.path.join(self.snapshot_controller.home, "dirpath2"),
                os.path.join(self.snapshot_controller.home, "filepath1")
            ],
            "environment_paths":
                environment_paths,
            "config_filename":
                config_filepath,
            "stats_filename":
                stats_filepath,
        }

        # Create snapshot in the project, then wait, and try status
        first_snapshot = self.snapshot_controller.create(input_dict)

        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test4"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert not current_snapshot  # snapshot was created from other environments and files (so user is not on any current snapshot)
        assert isinstance(latest_snapshot_user_generated, Snapshot)
        assert latest_snapshot_user_generated == first_snapshot
        assert not latest_snapshot_auto_generated
        assert not unstaged_code
        assert not unstaged_environment
        assert not unstaged_files

        # Create and run a task and test if task is shown
        first_task = self.task_controller.create()

        # Create task_dict
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        updated_first_task = self.task_controller.run(
            first_task.id, task_dict=task_dict)
        before_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_first_task.before_snapshot_id)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_first_task.after_snapshot_id)
        before_environment_obj = self.task_controller.dal.environment.get_by_id(
            before_snapshot_obj.environment_id)
        after_environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        assert before_environment_obj == after_environment_obj
        self.environment_ids.append(after_environment_obj.id)

        status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, unstaged_code, unstaged_environment, unstaged_files = \
            self.project_controller.status()

        assert status_dict
        assert isinstance(status_dict, dict)
        assert status_dict['name'] == "test4"
        assert status_dict['description'] == "test description"
        assert isinstance(status_dict['config'], dict)
        assert isinstance(current_snapshot, Snapshot)
        assert isinstance(latest_snapshot_user_generated, Snapshot)
        assert latest_snapshot_user_generated == first_snapshot
        assert isinstance(latest_snapshot_auto_generated, Snapshot)
        # current snapshot is the before snapshot for the run
        assert current_snapshot == before_snapshot_obj
        assert current_snapshot != latest_snapshot_auto_generated
        assert current_snapshot != latest_snapshot_user_generated
        # latest autogenerated snapshot is the after snapshot id
        assert latest_snapshot_auto_generated == after_snapshot_obj
        assert latest_snapshot_auto_generated != latest_snapshot_user_generated
        # user generated snapshot is not associated with any before or after snapshot
        assert latest_snapshot_user_generated != before_snapshot_obj
        assert latest_snapshot_user_generated != after_snapshot_obj
        assert not unstaged_code
        assert not unstaged_environment
        assert not unstaged_files
Пример #30
0
    def create(self, **kwargs):
        self.snapshot_controller = SnapshotController()
        self.cli_helper.echo(__("info", "cli.snapshot.create"))
        run_id = kwargs.get("run_id", None)
        # creating snapshot with task id if it exists
        if run_id is not None:
            excluded_args = [
                "environment_id", "environment_paths", "paths",
                "config_filepath", "config_filename", "stats_filepath",
                "stats_filename"
            ]
            for arg in excluded_args:
                if arg in kwargs and kwargs[arg] is not None:
                    raise SnapshotCreateFromTaskArgs(
                        "error", "cli.snapshot.create.run.args", arg)

            message = kwargs.get("message", None)
            label = kwargs.get("label", None)
            # Create a new core snapshot object
            snapshot_task_obj = self.snapshot_controller.create_from_task(
                message, run_id, label=label)
            self.cli_helper.echo(
                "Created snapshot id: %s" % snapshot_task_obj.id)
            return snapshot_task_obj
        else:
            # creating snapshot without task id
            snapshot_dict = {"visible": True}

            # Environment
            if kwargs.get("environment_id", None) or kwargs.get(
                    "environment_paths", None):
                mutually_exclusive_args = [
                    "environment_id", "environment_paths"
                ]
                mutually_exclusive(mutually_exclusive_args, kwargs,
                                   snapshot_dict)

            # File
            if kwargs.get("paths", None):
                snapshot_dict['paths'] = kwargs['paths']

            # Config
            if kwargs.get("config_filepath", None) or kwargs.get(
                    "config_filename", None) or kwargs.get("config", None):
                mutually_exclusive_args = [
                    "config_filepath", "config_filename", "config"
                ]
                mutually_exclusive(mutually_exclusive_args, kwargs,
                                   snapshot_dict)
            # parsing config
            if "config" in snapshot_dict:
                config = {}
                config_list = snapshot_dict["config"]
                for item in config_list:
                    item_parsed_dict = parse_cli_key_value(item, 'config')
                    config.update(item_parsed_dict)
                snapshot_dict["config"] = config

            # Stats
            if kwargs.get("stats_filepath", None) or kwargs.get(
                    "stats_filename", None) or kwargs.get("config", None):
                mutually_exclusive_args = [
                    "stats_filepath", "stats_filename", "stats"
                ]
                mutually_exclusive(mutually_exclusive_args, kwargs,
                                   snapshot_dict)
            # parsing stats
            if "stats" in snapshot_dict:
                stats = {}
                stats_list = snapshot_dict["stats"]
                for item in stats_list:
                    item_parsed_dict = parse_cli_key_value(item, 'stats')
                    stats.update(item_parsed_dict)
                snapshot_dict["stats"] = stats

            optional_args = ["message", "label"]

            for arg in optional_args:
                if arg in kwargs and kwargs[arg] is not None:
                    snapshot_dict[arg] = kwargs[arg]

            snapshot_obj = self.snapshot_controller.create(snapshot_dict)
            # Because snapshots may be invisible to the user, this function ensures that by the end
            # the user can monitor the snapshot on the CLI, but making it visible
            snapshot_obj = self.snapshot_controller.update(
                snapshot_obj.id, visible=True)
            self.cli_helper.echo(
                __("info", "cli.snapshot.create.success", snapshot_obj.id))
            return snapshot_obj