Пример #1
0
class TestTaskController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system() == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.project.init("test", "test description")
        self.environment = EnvironmentController(self.temp_dir)
        self.task = TaskController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_gpu = False
        input_dict = {
            "command": task_command,
            "gpu": task_gpu
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        assert task_obj
        assert task_obj.command == task_command
        assert task_obj.gpu == task_gpu

    def test_run_helper(self):
        # TODO: Try out more options (see below)
        # Create environment_driver id
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        environment_obj = self.environment.create({
            "definition_filepath": env_def_path
        })

        # Set log filepath
        log_filepath = os.path.join(self.task.home,
                                    "test.log")

        # create volume to mount
        temp_test_dirpath = os.path.join(self.temp_dir, "temp")
        os.makedirs(temp_test_dirpath)

        # Test option set 1
        random_name = ''.join([random.choice(string.ascii_letters + string.digits)
                               for _ in range(32)])
        options_dict = {
            "command": ["sh", "-c", "echo accuracy:0.45"],
            "ports": ["8888:8888"],
            "gpu": False,
            "name": random_name,
            "volumes": {
                temp_test_dirpath: {
                    'bind': '/task/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": True,
            "tty": False,
            "api": False
        }

        return_code, run_id, logs = \
            self.task._run_helper(environment_obj.id,
                                  options_dict, log_filepath)
        assert return_code == 0
        assert run_id and \
               self.task.environment_driver.get_container(run_id)
        assert logs and \
               os.path.exists(log_filepath)
        self.task.environment_driver.stop_remove_containers_by_term(term=random_name)

        # Test option set 2
        random_name_2 = ''.join([random.choice(string.ascii_letters + string.digits)
                               for _ in range(32)])
        options_dict = {
            "command": ["sh", "-c", "echo accuracy:0.45"],
            "ports": ["8888:8888"],
            "gpu": False,
            "name": random_name_2 ,
            "volumes": {
                temp_test_dirpath: {
                    'bind': '/task/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": True,
            "tty": False,
            "api": True
        }

        return_code, run_id, logs = \
            self.task._run_helper(environment_obj.id,
                                  options_dict, log_filepath)
        assert return_code == 0
        assert run_id and \
               self.task.environment_driver.get_container(run_id)
        assert logs and \
               os.path.exists(log_filepath)
        self.task.environment_driver.stop_remove_containers_by_term(term=random_name_2)

    def test_parse_logs_for_results(self):
        test_logs = """
        this is a log
        accuracy is good
        accuracy : 0.94
        this did not work
        validation : 0.32
        model_type : logistic regression
        """
        result = self.task._parse_logs_for_results(test_logs)

        assert isinstance(result, dict)
        assert result['accuracy'] == "0.94"
        assert result['validation'] == "0.32"
        assert result['model_type'] == "logistic regression"

    def test_run(self):
        # 1) Test success case with default values and env def file
        # 2) Test failure case if running same task (conflicting containers)
        # 3) Test failure case if running same task with snapshot_dict (conflicting containers)
        # 4) Test success case with snapshot_dict
        # 5) Test success case with saved file during task run

        # TODO: look into log filepath randomness, sometimes logs are not written
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        # Create environment definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 1) Test option 1
        updated_task_obj = self.task.run(task_obj.id)

        assert task_obj.id == updated_task_obj.id

        assert updated_task_obj.before_snapshot_id
        assert updated_task_obj.ports == None
        assert updated_task_obj.gpu == False
        assert updated_task_obj.interactive == False
        assert updated_task_obj.task_dirpath
        assert updated_task_obj.log_filepath
        assert updated_task_obj.start_time

        assert updated_task_obj.after_snapshot_id
        assert updated_task_obj.run_id
        assert updated_task_obj.logs
        assert "accuracy" in updated_task_obj.logs
        assert updated_task_obj.results
        assert updated_task_obj.results == {"accuracy": "0.45"}
        assert updated_task_obj.status == "SUCCESS"
        assert updated_task_obj.end_time
        assert updated_task_obj.duration

        # 2) Test option 2
        failed = False
        try:
             self.task.run(task_obj.id)
        except TaskRunException:
            failed = True
        assert failed

        # 3) Test option 3

        # Create files to add
        self.project.file_driver.create("dirpath1", directory=True)
        self.project.file_driver.create("dirpath2", directory=True)
        self.project.file_driver.create("filepath1")

        # Snapshot dictionary
        snapshot_dict = {
            "filepaths": [os.path.join(self.project.home, "dirpath1"),
                          os.path.join(self.project.home, "dirpath2"),
                          os.path.join(self.project.home, "filepath1")],
        }

        # Run a basic task in the project
        failed = False
        try:
            self.task.run(task_obj.id,
                          snapshot_dict=snapshot_dict)
        except TaskRunException:
            failed = True
        assert failed

        # Test when the specific task id is already RUNNING
        # Create task in the project
        task_obj_1 = self.task.create(input_dict)
        self.task.dal.task.update({"id": task_obj_1.id, "status": "RUNNING"})
        # Create environment_driver definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        failed = False
        try:
            self.task.run(task_obj_1.id)
        except TaskRunException:
            failed = True
        assert failed

        # 4) Test option 4

        # Create a new task in the project
        task_obj_2 = self.task.create(input_dict)

        # Run another task in the project
        updated_task_obj_2 = self.task.run(task_obj_2.id,
                                           snapshot_dict=snapshot_dict)

        assert task_obj_2.id == updated_task_obj_2.id

        assert updated_task_obj_2.before_snapshot_id
        assert updated_task_obj_2.ports == None
        assert updated_task_obj_2.gpu == False
        assert updated_task_obj_2.interactive == False
        assert updated_task_obj_2.task_dirpath
        assert updated_task_obj_2.log_filepath
        assert updated_task_obj_2.start_time

        assert updated_task_obj_2.after_snapshot_id
        assert updated_task_obj_2.run_id
        assert updated_task_obj_2.logs
        assert "accuracy" in updated_task_obj_2.logs
        assert updated_task_obj_2.results
        assert updated_task_obj_2.results == {"accuracy": "0.45"}
        assert updated_task_obj_2.status == "SUCCESS"
        assert updated_task_obj_2.end_time
        assert updated_task_obj_2.duration

        # 5) Test option 5

        # Create a basic script
        # (fails w/ no environment)
        test_filepath = os.path.join(self.temp_dir, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import os\n"))
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))
            f.write(to_unicode("print(' accuracy: 0.56 ')\n"))
            f.write(to_unicode("with open(os.path.join('/task', 'new_file.txt'), 'a') as f:\n"))
            f.write(to_unicode("    f.write('my test file')\n"))

        task_command = ["python", "script.py"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj_2 = self.task.create(input_dict)

        # Create environment definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        updated_task_obj_2 = self.task.run(task_obj_2.id)

        assert updated_task_obj_2.before_snapshot_id
        assert updated_task_obj_2.ports == None
        assert updated_task_obj_2.gpu == False
        assert updated_task_obj_2.interactive == False
        assert updated_task_obj_2.task_dirpath
        assert updated_task_obj_2.log_filepath
        assert updated_task_obj_2.start_time

        assert updated_task_obj_2.after_snapshot_id
        assert updated_task_obj_2.run_id
        assert updated_task_obj_2.logs
        assert "accuracy" in updated_task_obj_2.logs
        assert updated_task_obj_2.results
        assert updated_task_obj_2.results == {"accuracy": "0.56"}
        assert updated_task_obj_2.status == "SUCCESS"
        assert updated_task_obj_2.end_time
        assert updated_task_obj_2.duration

        # test if after snapshot has the file written
        after_snapshot_obj = self.task.dal.snapshot.get_by_id(
            updated_task_obj_2.after_snapshot_id
        )
        file_collection_obj = self.task.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id
        )
        files_absolute_path = os.path.join(self.task.home, file_collection_obj.path)

        assert os.path.isfile(os.path.join(files_absolute_path, "task.log"))
        assert os.path.isfile(os.path.join(files_absolute_path, "new_file.txt"))

    def test_list(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create tasks in the project
        task_obj_1 = self.task.create(input_dict)
        task_obj_2 = self.task.create(input_dict)

        # List all tasks regardless of filters
        result = self.task.list()

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result

        # List all tasks and filter by session
        result = self.task.list(session_id=
                                self.project.current_session.id)

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result

    def test_get_files(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        # Create environment definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # Create file to add
        self.project.file_driver.create("dirpath1", directory=True)
        self.project.file_driver.create(os.path.join("dirpath1", "filepath1"))

        # Snapshot dictionary
        snapshot_dict = {
            "filepaths": [os.path.join(self.project.home, "dirpath1", "filepath1")],
        }

        # Test the default values
        updated_task_obj = self.task.run(task_obj.id,
                                         snapshot_dict=snapshot_dict)

        # TODO: Test case for during run and before_snapshot run
        # Get files for the task after run is complete (default)
        result = self.task.get_files(updated_task_obj.id)

        after_snapshot_obj = self.task.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id
        )
        file_collection_obj = self.task.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id
        )

        assert len(result) == 2
        assert isinstance(result[0], TextIOWrapper)
        assert result[0].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "task.log")
        assert result[0].mode == "r"
        assert isinstance(result[1], TextIOWrapper)
        assert result[1].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "filepath1")
        assert result[1].mode == "r"

        # Get files for the task after run is complete for different mode
        result = self.task.get_files(updated_task_obj.id, mode="a")

        assert len(result) == 2
        assert isinstance(result[0], TextIOWrapper)
        assert result[0].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "task.log")
        assert result[0].mode == "a"
        assert isinstance(result[1], TextIOWrapper)
        assert result[1].name == os.path.join(self.task.home, ".datmo",
                                              "collections",
                                              file_collection_obj.filehash,
                                              "filepath1")
        assert result[1].mode == "a"

    def test_delete(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create tasks in the project
        task_obj = self.task.create(input_dict)

        # Delete task from the project
        result = self.task.delete(task_obj.id)

        # Check if task retrieval throws error
        thrown = False
        try:
            self.task.dal.snapshot.get_by_id(task_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
               thrown == True

    def test_stop(self):
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        input_dict = {
            "command": task_command
        }

        # Create task in the project
        task_obj = self.task.create(input_dict)

        # Create environment driver definition
        env_def_path = os.path.join(self.project.home,
                                    "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # Test the default values
        updated_task_obj = self.task.run(task_obj.id)

        # Stop the task
        task_id = updated_task_obj.id
        result = self.task.stop(task_id)

        # Check if task stop throws error when wrong task id is given
        thrown = False
        try:
            self.task.dal.snapshot.get_by_id(task_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
               thrown == True
Пример #2
0
class TestEnvironmentController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()

    def teardown_method(self):
        pass

    def __setup(self):
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()
        with open(os.path.join(self.temp_dir, "test.txt"), "wb") as f:
            f.write(to_bytes("hello"))
        self.random_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "test")
        with open(self.random_filepath, "wb") as f:
            f.write(to_bytes("cool"))
        self.definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        with open(self.definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

    def test_init_fail_project_not_init(self):
        Config().set_home(self.temp_dir)
        failed = False
        try:
            EnvironmentController()
        except ProjectNotInitialized:
            failed = True
        assert failed

    def test_get_supported_environments(self):
        self.__setup()
        result = self.environment_controller.get_supported_environments()
        assert result

    def test_setup(self):
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()

        # Test success setup once (no files present)
        options = {"name": "xgboost:cpu"}
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == options['name']
        assert result.description == "supported base environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/xgboost:cpu" in open(output_definition_filepath,
                                                "r").read()

        # Test success setup again (files present, but staged)
        options = {"name": "xgboost:cpu"}
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == options['name']
        assert result.description == "supported base environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/xgboost:cpu" in open(output_definition_filepath,
                                                "r").read()

        # Test failure in downstream function (e.g. bad inputs, no name given)
        failed = False
        try:
            self.environment_controller.setup(options={})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Change environment file
        with open(output_definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Test failure setup (unstaged changes)
        failed = False
        try:
            self.environment_controller.setup(options=options)
        except UnstagedChanges:
            failed = True
        assert failed

    def test_create(self):
        # 0) Test SUCCESS create when definition path exists in project environment directory (no input, no root) -- with hardware file
        # 1) Test SUCCESS create when definition path exists in project environment directory (no input, no root)
        # 5) Test SUCCESS when definition path exists in project environment directory and passed from input dict (takes input)
        # 2) Test SUCCESS create when definition path exists in root project folder (no input, no project environment dir)
        # 3) Test SUCCESS create when definition path is passed into input dict (takes input, no project environment dir)
        # 4) Test SUCCESS create when definition path is passed into input dict along with expected filename to be saved
        # 6) Test FAIL when passing same filepath with same filename into input dict

        self.__setup()

        input_dict_0 = {"name": "test", "description": "test description"}

        # 0) Test option 0 (cannot test hash because hardware is machine-dependent)
        environment_obj_0 = self.environment_controller.create(input_dict_0)
        assert environment_obj_0
        assert isinstance(environment_obj_0, Environment)
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info
        assert environment_obj_0.unique_hash
        assert environment_obj_0.name == "test"
        assert environment_obj_0.description == "test description"

        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_0.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "test"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        output = open(os.path.join(file_collection_dir, "datmoDockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))
        output = open(os.path.join(file_collection_dir, "hardware_info"),
                      "r").read()
        print(repr(output))

        # 1) Test option 1
        environment_obj_0 = self.environment_controller.create(
            input_dict_0, save_hardware_file=False)
        assert environment_obj_0
        assert isinstance(environment_obj_0, Environment)
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_0.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "test"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        output = open(os.path.join(file_collection_dir, "datmoDockerfile"),
                      "r").read()
        print(repr(output))
        assert environment_obj_0.unique_hash == "c309ae4f58163693a91816988d9dc88b"
        assert environment_obj_0.name == "test"
        assert environment_obj_0.description == "test description"
        # Files ["test", "Dockerfile", "datmoDockerfile"]

        # 5) Test option 5
        input_dict_1 = {
            "name": "test",
            "description": "test description",
            "paths": [self.definition_filepath],
        }

        environment_obj = self.environment_controller.create(
            input_dict_1, save_hardware_file=False)
        assert environment_obj
        assert isinstance(environment_obj, Environment)
        assert environment_obj.id
        assert environment_obj.driver_type == "docker"
        assert environment_obj.file_collection_id
        assert environment_obj.definition_filename
        assert environment_obj.hardware_info
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        output = open(os.path.join(file_collection_dir, "datmoDockerfile"),
                      "r").read()
        print(repr(output))
        assert environment_obj.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"
        assert environment_obj.name == "test"
        assert environment_obj.description == "test description"
        # Files ["Dockerfile", "datmoDockerfile"]

        # remove the project environment directory
        shutil.rmtree(
            self.environment_controller.file_driver.environment_directory)

        # Create environment definition in root directory
        home_definition_filepath = os.path.join(
            self.environment_controller.home, "Dockerfile")
        with open(home_definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # 2) Test option 2
        environment_obj_1 = self.environment_controller.create(
            input_dict_0, save_hardware_file=False)

        assert environment_obj_1
        assert isinstance(environment_obj_1, Environment)
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_1.name == "test"
        assert environment_obj_1.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert environment_obj_1.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"

        # 3) Test option 3
        input_dict_2 = {
            "name": "test",
            "description": "test description",
            "paths": [home_definition_filepath],
        }

        # Create environment in the project
        environment_obj_2 = self.environment_controller.create(
            input_dict_2, save_hardware_file=False)

        assert environment_obj_2
        assert isinstance(environment_obj_2, Environment)
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_2.name == "test"
        assert environment_obj_2.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert environment_obj_2.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"

        # 4) Test option 4
        input_dict_3 = {
            "paths": [home_definition_filepath + ">Dockerfile"],
        }

        # Create environment in the project
        environment_obj_3 = self.environment_controller.create(
            input_dict_3, save_hardware_file=False)

        assert environment_obj_3
        assert isinstance(environment_obj_3, Environment)
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_3.name == "test"
        assert environment_obj_3.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert environment_obj_3.unique_hash == "6e06d7c4d77cb6ae69e7e0efa883ef4b"

        # 6) Test option 6
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")

        input_dict = {
            "paths": [
                definition_filepath + ">Dockerfile",
                definition_filepath + ">Dockerfile"
            ],
        }

        # Create environment in the project
        failed = False
        try:
            _ = self.environment_controller.create(input_dict,
                                                   save_hardware_file=False)
        except FileAlreadyExistsError:
            failed = True

        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            _ = self.environment_controller.build("does_not_exist")
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))
        input_dict = {
            "paths": [definition_filepath],
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict)
        result = self.environment_controller.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        # Create environment definition in project environment directory
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))
        environment_obj_4 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_4.id)
        assert result

        # teardown
        self.environment_controller.delete(environment_obj_1.id)

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_run(self):
        # Test run simple command with simple Dockerfile
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # 0) Test option 0
        # Create environment definition in project environment directory
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "detach": True,
            "stdin_open": False,
            "mem_limit": "4g",
            "tty": False,
            "api": False
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create({})

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment_controller.delete(environment_obj.id)
        shutil.rmtree(
            self.environment_controller.file_driver.environment_directory)

        # 1) Test option 1
        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "mem_limit": "4g",
            "detach": True,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment_controller.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("import sys\n"))
            f.write(to_bytes("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment_controller.create({})
        self.environment_controller.build(environment_obj.id)

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": {
                self.environment_controller.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment_controller.delete(environment_obj.id)

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_interactive_run(self):
        # 1) Test run interactive terminal in environment
        # 2) Test run jupyter notebook in environment
        # Create environment definition
        self.project_controller.init("test6", "test description")
        self.environment_controller = EnvironmentController()

        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM datmo/xgboost:cpu" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)
        # 1) Test option 1
        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": [],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "mem_limit": "4g",
                "detach": True,
                "stdin_open": True,
                "tty": True,
                "api": False
            }

            log_filepath = os.path.join(self.project_controller.home,
                                        "task.log")

            # Build environment in the project
            _ = self.environment_controller.build(environment_obj.id)

            # Run environment in the project
            self.environment_controller.run(environment_obj.id, run_options,
                                            log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # teardown
        self.environment_controller.delete(environment_obj.id)

        # 2) Test option 2
        environment_obj = self.environment_controller.create(input_dict)

        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": ["jupyter", "notebook"],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "mem_limit": "4g",
                "detach": True,
                "stdin_open": False,
                "tty": False,
                "api": False
            }

            log_filepath = os.path.join(self.project_controller.home,
                                        "task.log")

            # Build environment in the project
            _ = self.environment_controller.build(environment_obj.id)

            # Run environment in the project
            self.environment_controller.run(environment_obj.id, run_options,
                                            log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # Stop the running environment
        # self.environment_controller.stop(container_name)

        # teardown
        self.environment_controller.delete(environment_obj.id)

    def test_list(self):
        self.project_controller.init("test4", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment_controller.home,
                                         "Dockerfile")
        with open(definition_path_1, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict_1 = {
            "paths": [definition_path_1],
        }

        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment_controller.home,
                                         "Dockerfile2")
        with open(definition_path_2, "wb") as f:
            f.write(to_bytes("FROM python:3.4-alpine"))

        input_dict_2 = {
            "paths": [definition_path_2 + ">Dockerfile"],
        }

        # Create second environment in the project
        environment_obj_2 = self.environment_controller.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment_controller.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_update(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Test success update
        new_name = "test name"
        new_description = "test description"
        result = self.environment_controller.update(
            environment_obj.id, name=new_name, description=new_description)
        assert result
        assert isinstance(result, Environment)
        assert result.name == new_name
        assert result.description == new_description

        # Test failed update
        failed = False
        try:
            self.environment_controller.update("random_id",
                                               name=new_name,
                                               description=new_description)
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_delete(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Delete environment in the project
        result = self.environment_controller.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment_controller.dal.environment.get_by_id(
                environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_failure(self):
        # 1) Test failure with RequiredArgumentMissing
        # 2) Test failure with TooManyArgumentsFound
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            self.environment_controller.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.environment_controller.stop(run_id="hello",
                                             match_string="there")
        except TooManyArgumentsFound:
            failed = True
        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_success(self):
        # TODO: test more run options
        # 1) Test run_id input to stop
        # 2) Test match_string input to stop
        # 3) Test all input to stop
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + "\n"))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # 1) Test option 1

        _, run_id, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(run_id=run_id)

        assert return_code

        # 2) Test option 2
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(
            match_string="datmo-task-" + self.environment_controller.model.id)

        assert return_code

        # 3) Test option 3
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        run_options_2 = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test2",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options_2, log_filepath)
        return_code = self.environment_controller.stop(all=True)

        assert return_code

        # teardown
        self.environment_controller.delete(environment_obj.id)

    def test_exists_env(self):
        # Test failure, not initialized
        failed = False
        try:
            _ = self.environment_controller.create({})
        except:
            failed = True
        assert failed

        # Setup
        self.__setup()
        environment_obj = self.environment_controller.create({})

        # Check by environment id
        result = self.environment_controller.exists(
            environment_id=environment_obj.id)
        assert result

        # Check by unique hash
        result = self.environment_controller.exists(
            environment_unique_hash=environment_obj.unique_hash)
        assert result

        # Test with wrong environment id
        result = self.environment_controller.exists(
            environment_id='test_wrong_env_id')
        assert not result

    def test_calculate_project_environment_hash(self):
        # Setup
        self.__setup()
        # Test hashing the default (with hardware info)
        result = self.environment_controller._calculate_project_environment_hash(
        )
        assert result
        # Test hashing the default Dockerfile
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "c309ae4f58163693a91816988d9dc88b"
        # Test if hash is the same as that of create
        environment_obj = self.environment_controller.create(
            {}, save_hardware_file=False)
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "c309ae4f58163693a91816988d9dc88b"
        assert result == environment_obj.unique_hash

        # Test if the hash is the same if the same file is passed in as an input
        input_dict = {
            "paths": [self.definition_filepath, self.random_filepath]
        }
        environment_obj_1 = self.environment_controller.create(
            input_dict, save_hardware_file=False)
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "c309ae4f58163693a91816988d9dc88b"
        assert result == environment_obj_1.unique_hash

    def test_has_unstaged_changes(self):
        # Setup
        self.__setup()
        _ = self.environment_controller.create({})
        # Check for no unstaged changes
        result = self.environment_controller._has_unstaged_changes()
        assert not result

        # Make a change to the file (update python version)
        with open(
                os.path.join(
                    self.environment_controller.file_driver.
                    environment_directory, "Dockerfile"), "wb") as f:
            f.write(to_bytes("FROM python:3.6-alpine"))

        # Check again, should have unstaged changes
        result = self.environment_controller._has_unstaged_changes()
        assert result

    def test_check_unstaged_changes(self):
        # Setup
        self.__setup()
        obj = self.environment_controller.create({})

        # 1) After commiting the changes
        # Check for no unstaged changes because already committed
        result = self.environment_controller.check_unstaged_changes()
        assert not result

        # Add a new file
        with open(
                os.path.join(
                    self.environment_controller.file_driver.
                    environment_directory, "test2"), "wb") as f:
            f.write(to_bytes("cool"))

        # 2) Not commiting the changes, should error and raise UnstagedChanges
        failed = False
        try:
            self.environment_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed

        # Remove new file
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test2"))

        # 3) Files are the same as before but no new commit, should have no unstaged changes
        result = self.environment_controller.check_unstaged_changes()
        assert not result

        # 4) Remove another file, now it is different and should have unstaged changes
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        failed = False
        try:
            self.environment_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed

        # 5) Remove the rest of the files, now it is empty and should return as already staged
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        result = self.environment_controller.check_unstaged_changes()
        assert not result

    def test_checkout(self):
        # Setup and create all environment files
        self.__setup()

        # Create environment to checkout to with defaults
        environment_obj = self.environment_controller.create({})

        # Checkout success with there are no unstaged changes
        result = self.environment_controller.checkout(environment_obj.id)
        assert result
        current_hash = self.environment_controller._calculate_project_environment_hash(
        )
        assert environment_obj.unique_hash == current_hash
        # Check the filenames as well because the hash does not take this into account
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "datmoDockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "hardware_info"))

        # Change file contents to make it unstaged
        with open(self.definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Checkout failure with unstaged changes
        failed = False
        try:
            _ = self.environment_controller.checkout(environment_obj.id)
        except UnstagedChanges:
            failed = True
        assert failed

        # Create new environment to checkout to with defaults (no hardware)
        environment_obj_1 = self.environment_controller.create(
            {}, save_hardware_file=False)

        # Checkout success with there are no unstaged changes
        result = self.environment_controller.checkout(environment_obj.id)
        assert result
        current_hash = self.environment_controller._calculate_project_environment_hash(
        )
        assert environment_obj.unique_hash == current_hash
        assert environment_obj_1.unique_hash != current_hash
        # Check the filenames as well because the hash does not take this into account
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "datmoDockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "hardware_info"))
Пример #3
0
class TestEnvironmentController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system(
        ) == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.environment = EnvironmentController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        # 0) Test create when unsupported language given
        # 1) Test create when NO file exists and NO definition path exists
        # 2) Test create when NO file exists and definition path exists
        # 3) Test create when definition path exists and given
        # 4) Test create when file exists and definition path exists
        # 5) Test create when file exists but NO definition path exists
        # 6) Test create when definition path exists and given for NEW definition filepath

        self.project.init("test3", "test description")

        # 0) Test option 0
        try:
            self.environment.create({"language": "java"})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # 1) Test option 1
        failed = False
        try:
            self.environment.create({})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 2) Test option 2
        environment_obj_1 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_1
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # 3) Test option 3
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj_2 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_2
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_3
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Remove definition filepath
        os.remove(definition_filepath)

        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_4.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_4
        assert environment_obj_4.id
        assert environment_obj_4.driver_type == "docker"
        assert environment_obj_4.file_collection_id
        assert environment_obj_4.definition_filename
        assert environment_obj_4.hardware_info
        assert environment_obj_4.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(
            os.path.join(file_collection_dir, "requirements.txt"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_1.id != environment_obj_4.id

        # 6) Test option 6

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM cloudgear/ubuntu:14.04")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create a new environment obj
        environment_obj_5 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_5.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_5
        assert environment_obj_5.id
        assert environment_obj_5.driver_type == "docker"
        assert environment_obj_5.file_collection_id
        assert environment_obj_5.definition_filename
        assert environment_obj_5.hardware_info
        assert environment_obj_5.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_5.id != environment_obj_1.id
        assert environment_obj_5.id != environment_obj_4.id

    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project.init("test5", "test description")

        # 1) Test option 1
        failed = False
        try:
            _ = self.environment.build("does_not_exist")
        except EntityNotFound:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict)
        result = self.environment.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment.create({})
        result = self.environment.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})
        result = self.environment.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})
        result = self.environment.build(environment_obj_4.id)
        assert result
        assert environment_obj_4.id != environment_obj_1.id

        # teardown
        self.environment.delete(environment_obj_1.id)
        self.environment.delete(environment_obj_4.id)

    def test_run(self):
        # 1) Test run simple command with simple Dockerfile
        # 2) Test run script, with autogenerated definition
        self.project.init("test5", "test description")

        # 1) Test option 1

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment.create({})
        self.environment.build(environment_obj.id)

        run_options = {
            "command": ["python", "script.py"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": {
                self.environment.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

    def test_list(self):
        self.project.init("test4", "test description")

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_path_1, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict_1 = {
            "definition_filepath": definition_path_1,
        }

        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment.home, "Dockerfile2")
        with open(definition_path_2, "w") as f:
            f.write(to_unicode(str("FROM datmo/scikit-opencv")))

        input_dict_2 = {
            "definition_filepath": definition_path_2,
        }

        # Create second environment in the project
        environment_obj_2 = self.environment.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_delete(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        # Delete environment in the project
        result = self.environment.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment.dal.environment.get_by_id(environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    def test_stop(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": None,
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "gpu": False,
            "api": False
        }

        # Create environment_driver definition
        env_def_path = os.path.join(self.project.home, "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        _, run_id, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        # Stop the running environment
        return_code = self.environment.stop(run_id)
        assert return_code

        # teardown
        self.environment.delete(environment_obj.id)
Пример #4
0
class SnapshotController(BaseController):
    """SnapshotController inherits from BaseController and manages business logic related to snapshots

    Parameters
    ----------
    home : str
        home path of the project

    Attributes
    ----------
    code : datmo.core.controller.code.code.CodeController
    file_collection : datmo.core.controller.file.file_collection.FileCollectionController
    environment : datmo.core.controller.environment.environment.EnvironmentController

    Methods
    -------
    create(dictionary)
        Create a snapshot within the project
    checkout(id)
        Checkout to a specific snapshot within the project
    list(session_id=None)
        List all snapshots present within the project based on given filters
    delete(id)
        Delete the snapshot specified from the project

    """
    def __init__(self):
        super(SnapshotController, self).__init__()
        self.code = CodeController()
        self.file_collection = FileCollectionController()
        self.environment = EnvironmentController()
        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.snapshot.__init__"))

    def create(self, dictionary):
        """Create snapshot object

        Parameters
        ----------
        dictionary : dict

            for each of the 5 key components, this function will search for
            one of the variables below starting from the top. Default functionality
            is described below for each component as well for reference if none
            of the variables are given.

            code :
                code_id : str, optional
                    code reference associated with the snapshot; if not
                    provided will look to inputs below for code creation
                commit_id : str, optional
                    commit id provided by the user if already available

                Default
                -------
                commits will be taken and code created via the  CodeController
                and are added to the snapshot at the time of snapshot creation

            environment :
                environment_id : str, optional
                    id for environment used to create snapshot
                environment_paths : list, optional
                    list of absolute or relative filepaths and/or dirpaths to collect with destination names
                    (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir")

                Default
                -------
                default environment files will be searched and environment will
                be created with the EnvironmentController and added to the snapshot
                at the time of snapshot creation

            file_collection :
                file_collection_id : str, optional
                    file collection associated with the snapshot
                paths : list, optional
                    list of absolute or relative filepaths and/or dirpaths to collect with destination names
                    (e.g. "/path/to/file:hello", "/path/to/file2", "/path/to/dir:newdir")

                Default
                -------
                paths will be considered empty ([]), and the FileCollectionController
                will create a blank FileCollection that is empty.

            config :
                config : dict, optional
                    key, value pairs of configurations
                config_filepath : str, optional
                    absolute filepath to configuration parameters file
                config_filename : str, optional
                    name of file with configuration parameters

                Default
                -------
                config will be considered empty ({}) and saved to the snapshot

            stats :
                stats : dict, optional
                    key, value pairs of metrics and statistics
                stats_filepath : str, optional
                    absolute filepath to stats parameters file
                stats_filename : str, optional
                    name of file with metrics and statistics.

                Default
                -------
                stats will be considered empty ({}) and saved to the snapshot

            for the remaining optional arguments it will search for them
            in the input dictionary

                message : str
                    long description of snapshot
                session_id : str, optional
                    session id within which snapshot is created,
                    will overwrite default if given
                task_id : str, optional
                    task id associated with snapshot
                label : str, optional
                    short description of snapshot
                visible : bool, optional
                    True if visible to user via list command else False

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            snapshot object with all relevant parameters

        Raises
        ------
        RequiredArgumentMissing
            if required arguments are not given by the user
        FileIOError
            if files are not present or there is an error in File IO
        """
        # Validate Inputs
        create_dict = {
            "model_id": self.model.id,
            "session_id": self.current_session.id,
        }

        validate("create_snapshot", dictionary)

        # Message must be present
        if "message" in dictionary:
            create_dict['message'] = dictionary['message']
        else:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.create.arg", "message"))

        # Code setup
        self._code_setup(dictionary, create_dict)

        # Environment setup
        self._env_setup(dictionary, create_dict)

        # File setup
        self._file_setup(dictionary, create_dict)

        # Config setup
        self._config_setup(dictionary, create_dict)

        # Stats setup
        self._stats_setup(dictionary, create_dict)

        # If snapshot object with required args already exists, return it
        # DO NOT create a new snapshot with the same required arguments
        results = self.dal.snapshot.query({
            "model_id":
            create_dict["model_id"],
            "code_id":
            create_dict['code_id'],
            "environment_id":
            create_dict['environment_id'],
            "file_collection_id":
            create_dict['file_collection_id'],
            "config":
            create_dict['config'],
            "stats":
            create_dict['stats']
        })
        if results: return results[0]

        # Optional args for Snapshot entity
        optional_args = ["task_id", "label", "visible"]
        for optional_arg in optional_args:
            if optional_arg in dictionary:
                create_dict[optional_arg] = dictionary[optional_arg]

        # Create snapshot and return
        return self.dal.snapshot.create(Snapshot(create_dict))

    def create_from_task(self,
                         message,
                         task_id,
                         label=None,
                         config=None,
                         stats=None):
        """Create snapshot from a completed task.
        # TODO: enable create from task DURING a run

        Parameters
        ----------
        message : str
            long description of snapshot
        task_id : str
            task object to use to create snapshot
        label: str, optional
            short description of snapshot
        config : dict, optional
            key, value pairs of configurations
        stats : dict, optional
            key, value pairs of metrics and statistics

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            snapshot object with all relevant parameters

        Raises
        ------
        TaskNotComplete
            if task specified has not been completed
        """

        validate(
            "create_snapshot_from_task", {
                "message": message,
                "task_id": task_id,
                "label": label,
                "config": config,
                "stats": stats
            })

        task_obj = self.dal.task.get_by_id(task_id)

        if not task_obj.status and not task_obj.after_snapshot_id:
            raise TaskNotComplete(
                __("error", "controller.snapshot.create_from_task",
                   str(task_obj.id)))

        after_snapshot_obj = self.dal.snapshot.get_by_id(
            task_obj.after_snapshot_id)

        snapshot_update_dict = {
            "id": task_obj.after_snapshot_id,
            "message": message,
            "visible": True
        }

        if label:
            snapshot_update_dict["label"] = label

        if config:
            snapshot_update_dict["config"] = config

        if stats:
            snapshot_update_dict["stats"] = stats
        else:
            # Append to any existing stats already present
            snapshot_update_dict["stats"] = {}
            if after_snapshot_obj.stats is not None:
                snapshot_update_dict["stats"].update(after_snapshot_obj.stats)
            if task_obj.results is not None:
                snapshot_update_dict["stats"].update(task_obj.results)
            if snapshot_update_dict["stats"] == {}:
                snapshot_update_dict["stats"] = None

        return self.dal.snapshot.update(snapshot_update_dict)

    def checkout(self, snapshot_id):
        # Get snapshot object
        snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id)
        code_obj = self.dal.code.get_by_id(snapshot_obj.code_id)
        file_collection_obj = self.dal.file_collection.\
            get_by_id(snapshot_obj.file_collection_id)
        environment_obj = self.dal.environment. \
            get_by_id(snapshot_obj.environment_id)

        # check for unstaged changes in code
        self.code_driver.check_unstaged_changes()
        # check for unstaged changes in environment
        self.environment.check_unstaged_changes()
        # check for unstaged changes in file
        self.file_collection.check_unstaged_changes()

        # Checkout code_driver to the relevant commit ref
        code_checkout_success = self.code_driver.checkout_ref(
            code_obj.commit_id)

        # Checkout environment_driver to relevant environment id
        environment_checkout_success = self.environment.checkout(
            environment_obj.id)

        # Checkout file_driver to relevant file collection id
        file_checkout_success = self.file_collection.checkout(
            file_collection_obj.id)

        return (code_checkout_success and environment_checkout_success
                and file_checkout_success)

    def list(self,
             session_id=None,
             visible=None,
             sort_key=None,
             sort_order=None):
        query = {}
        if session_id:
            try:
                self.dal.session.get_by_id(session_id)
            except EntityNotFound:
                raise SessionDoesNotExist(
                    __("error", "controller.snapshot.list", session_id))
            query['session_id'] = session_id
        if visible is not None and isinstance(visible, bool):
            query['visible'] = visible

        return self.dal.snapshot.query(query, sort_key, sort_order)

    def update(self,
               snapshot_id,
               config=None,
               stats=None,
               message=None,
               label=None,
               visible=None):
        """Update the snapshot metadata"""
        if not snapshot_id:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.delete.arg", "snapshot_id"))
        update_snapshot_input_dict = {'id': snapshot_id}
        validate(
            "update_snapshot", {
                "config": config,
                "stats": stats,
                "message": message,
                "label": label,
                "visible": visible
            })
        if config is not None:
            update_snapshot_input_dict['config'] = config
        if stats is not None:
            update_snapshot_input_dict['stats'] = stats
        if message is not None:
            update_snapshot_input_dict['message'] = message
        if label is not None:
            update_snapshot_input_dict['label'] = label
        if visible is not None:
            update_snapshot_input_dict['visible'] = visible

        return self.dal.snapshot.update(update_snapshot_input_dict)

    def get(self, snapshot_id):
        """Get snapshot object and return

        Parameters
        ----------
        snapshot_id : str
            id for the snapshot you would like to get

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            core snapshot object

        Raises
        ------
        DoesNotExist
            snapshot does not exist
        """
        try:
            return self.dal.snapshot.get_by_id(snapshot_id)
        except EntityNotFound:
            raise DoesNotExist()

    def get_files(self, snapshot_id, mode="r"):
        """Get list of file objects for snapshot id.

        Parameters
        ----------
        snapshot_id : str
            id for the snapshot you would like to get file objects for
        mode : str
            file open mode
            (default is "r" to open file for read)

        Returns
        -------
        list
            list of python file objects

        Raises
        ------
        DoesNotExist
            snapshot object does not exist
        """
        try:
            snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id)
        except EntityNotFound:
            raise DoesNotExist()
        file_collection_obj = self.dal.file_collection.get_by_id(
            snapshot_obj.file_collection_id)
        return self.file_driver.get_collection_files(
            file_collection_obj.filehash, mode=mode)

    def delete(self, snapshot_id):
        """Delete all traces of a snapshot

        Parameters
        ----------
        snapshot_id : str
            id for the snapshot to remove

        Returns
        -------
        bool
            True if success

        Raises
        ------
        RequiredArgumentMissing
            if the provided snapshot_id is None
        """
        if not snapshot_id:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.delete.arg", "snapshot_id"))
        return self.dal.snapshot.delete(snapshot_id)

    def _code_setup(self, incoming_dictionary, create_dict):
        """ Set the code_id by using:
            1. code_id
            2. commit_id string, which creates a new code_id
            3. create a new code id

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """
        if "code_id" in incoming_dictionary:
            create_dict['code_id'] = incoming_dictionary['code_id']
        elif "commit_id" in incoming_dictionary:
            create_dict['code_id'] = self.code.\
                create(commit_id=incoming_dictionary['commit_id']).id
        else:
            create_dict['code_id'] = self.code.create().id

    def _env_setup(self, incoming_dictionary, create_dict):
        """ TODO:

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """
        if "environment_id" in incoming_dictionary:
            create_dict['environment_id'] = incoming_dictionary[
                'environment_id']
        elif "environment_paths" in incoming_dictionary:
            create_dict['environment_id'] = self.environment.create({
                "paths":
                incoming_dictionary['environment_paths']
            }).id
        else:
            # create some default environment
            create_dict['environment_id'] = self.environment.\
                create({}).id

    def _file_setup(self, incoming_dictionary, create_dict):
        """ Checks for user inputs and uses the file collection controller to
        obtain the file collection id and create the necessary collection

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """

        if "file_collection_id" in incoming_dictionary:

            create_dict['file_collection_id'] = incoming_dictionary[
                'file_collection_id']
        elif "paths" in incoming_dictionary:
            # transform file paths to file_collection_id
            create_dict['file_collection_id'] = self.file_collection.\
                create(incoming_dictionary['paths']).id
        else:
            # create some default file collection
            create_dict['file_collection_id'] = self.file_collection.\
                create([]).id

    def _config_setup(self, incoming_dictionary, create_dict):
        """ Fills in snapshot config by having one of the following:
            1. config = JSON object
            2. config_filepath = some location where a json file exists
            3. config_filename = just the file name

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity

        Raises
        ------
        FileIOError
        """
        if "config" in incoming_dictionary:
            create_dict['config'] = incoming_dictionary['config']
        elif "config_filepath" in incoming_dictionary:
            if not os.path.isfile(incoming_dictionary['config_filepath']):
                raise FileIOError(
                    __("error", "controller.snapshot.create.file_config"))
            # If path exists transform file to config dict
            config_json_driver = JSONStore(
                incoming_dictionary['config_filepath'])
            create_dict['config'] = config_json_driver.to_dict()
        elif "config_filename" in incoming_dictionary:
            config_filename = incoming_dictionary['config_filename']
            create_dict['config'] = self._find_in_filecollection(
                config_filename, create_dict['file_collection_id'])
        else:
            config_filename = "config.json"
            create_dict['config'] = self._find_in_filecollection(
                config_filename, create_dict['file_collection_id'])

    def _stats_setup(self, incoming_dictionary, create_dict):
        """Fills in snapshot stats by having one of the following:
            1. stats = JSON object
            2. stats_filepath = some location where a json file exists
            3. stats_filename = just the file name

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity

        Raises
        ------
        FileIOError
        """

        if "stats" in incoming_dictionary:
            create_dict['stats'] = incoming_dictionary['stats']
        elif "stats_filepath" in incoming_dictionary:
            if not os.path.isfile(incoming_dictionary['stats_filepath']):
                raise FileIOError(
                    __("error", "controller.snapshot.create.file_stat"))
            # If path exists transform file to config dict
            stats_json_driver = JSONStore(
                incoming_dictionary['stats_filepath'])
            create_dict['stats'] = stats_json_driver.to_dict()
        elif "stats_filename" in incoming_dictionary:
            stats_filename = incoming_dictionary['stats_filename']
            create_dict['stats'] = self._find_in_filecollection(
                stats_filename, create_dict['file_collection_id'])
        else:
            stats_filename = "stats.json"
            create_dict['stats'] = self._find_in_filecollection(
                stats_filename, create_dict['file_collection_id'])

    def _find_in_filecollection(self, file_to_find, file_collection_id):
        """ Attempts to find a file within the file collection

        Returns
        -------
        dict
            output dictionary of the JSON file
        """

        file_collection_obj = self.file_collection.dal.file_collection.\
            get_by_id(file_collection_id)
        file_collection_path = \
            self.file_collection.file_driver.get_collection_path(
                file_collection_obj.filehash)
        # find all of the possible paths it could exist
        possible_paths = [os.path.join(self.home, file_to_find)] + \
                            [os.path.join(self.home, item[0], file_to_find)
                            for item in os.walk(file_collection_path)]
        existing_possible_paths = [
            possible_path for possible_path in possible_paths
            if os.path.isfile(possible_path)
        ]
        if not existing_possible_paths:
            # TODO: Add some info / warning that no file was found
            # create some default stats
            return {}
        else:
            # If any such path exists, transform file to stats dict
            json_file = JSONStore(existing_possible_paths[0])
            return json_file.to_dict()
Пример #5
0
class TestTaskController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.environment_ids = []

    def teardown_method(self):
        if not check_docker_inactive(test_datmo_dir,
                                     Config().datmo_directory_name):
            self.__setup()
            self.environment_controller = EnvironmentController()
            for env_id in list(set(self.environment_ids)):
                if not self.environment_controller.delete(env_id):
                    raise Exception

    def __setup(self):
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()
        self.project_controller.init("test", "test description")
        self.environment_controller = EnvironmentController()
        self.task_controller = TaskController()

    def test_init_fail_project_not_init(self):
        Config().set_home(self.temp_dir)
        failed = False
        try:
            TaskController()
        except ProjectNotInitialized:
            failed = True
        assert failed

    def test_init_fail_invalid_path(self):
        test_home = "some_random_dir"
        Config().set_home(test_home)
        failed = False
        try:
            TaskController()
        except InvalidProjectPath:
            failed = True
        assert failed

    def test_create(self):
        self.__setup()
        # Create task in the project
        task_obj = self.task_controller.create()

        assert isinstance(task_obj, Task)
        assert task_obj.created_at
        assert task_obj.updated_at

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_run_helper(self):
        self.__setup()
        # TODO: Try out more options (see below)
        # Create environment_driver id
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        paths = [env_def_path]
        environment_obj = self.environment_controller.create({"paths": paths})
        self.environment_ids.append(environment_obj.id)

        # Set log filepath
        log_filepath = os.path.join(self.task_controller.home, "test.log")

        # create volume to mount
        temp_test_dirpath = os.path.join(self.temp_dir, "temp")
        os.makedirs(temp_test_dirpath)

        # Test option set 1
        random_name = str(uuid.uuid1())
        options_dict = {
            "command": ["sh", "-c", "echo accuracy:0.45"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": {
                temp_test_dirpath: {
                    'bind': '/task/',
                    'mode': 'rw'
                }
            },
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False,
            "interactive": False
        }

        return_code, run_id, logs = \
            self.task_controller._run_helper(environment_obj.id,
                                  options_dict, log_filepath)
        assert return_code == 0
        assert run_id and \
               self.task_controller.environment_driver.get_container(run_id)
        assert logs and \
               os.path.exists(log_filepath)
        self.task_controller.environment_driver.stop_remove_containers_by_term(
            term=random_name)

        # Test option set 2
        random_name_2 = str(uuid.uuid1())
        options_dict = {
            "command": ["sh", "-c", "echo accuracy:0.45"],
            "ports": ["8888:8888"],
            "name": random_name_2,
            "volumes": {
                temp_test_dirpath: {
                    'bind': '/task/',
                    'mode': 'rw'
                }
            },
            "mem_limit": "4g",
            "detach": True,
            "stdin_open": False,
            "tty": False,
            "api": True,
            "interactive": False
        }

        return_code, run_id, logs = \
            self.task_controller._run_helper(environment_obj.id,
                                  options_dict, log_filepath)
        assert return_code == 0
        assert run_id and \
               self.task_controller.environment_driver.get_container(run_id)
        assert logs and \
               os.path.exists(log_filepath)
        self.task_controller.environment_driver.stop_remove_containers_by_term(
            term=random_name_2)

    def test_parse_logs_for_results(self):
        self.__setup()
        test_logs = """
        this is a log
        accuracy is good
        accuracy : 0.94
        this did not work
        validation : 0.32
        model_type : logistic regression
        """
        result = self.task_controller._parse_logs_for_results(test_logs)

        assert isinstance(result, dict)
        assert result['accuracy'] == "0.94"
        assert result['validation'] == "0.32"
        assert result['model_type'] == "logistic regression"

        test_logs = """test"""
        result = self.task_controller._parse_logs_for_results(test_logs)
        assert result is None

    def test_update_environment_run_options(self):
        self.__setup()
        environment_run_option = {
            "command": ["python", "script.py"],
            "volumes": {
                os.path.join(self.temp_dir, "/temp_task"): {
                    'bind': '/task/',
                    'mode': 'rw'
                },
                self.temp_dir: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            }
        }
        # create data file
        data_dirpath = os.path.join(self.temp_dir, "data")
        data_file_dirpath = os.path.join(self.temp_dir, "data_folder")
        data_filepath = os.path.join(data_file_dirpath, "data_file.txt")
        os.mkdir(data_dirpath)
        os.mkdir(data_file_dirpath)
        with open(data_filepath, "wb") as f:
            f.write(to_bytes("data file"))

        data_file_path_map = [(data_filepath, "data_file.txt")]
        data_directory_path_map = [(data_dirpath, "data_directory")]

        environment_run_option = self.task_controller._update_environment_run_options(
            environment_run_option, data_file_path_map,
            data_directory_path_map)

        assert environment_run_option["volumes"][data_file_dirpath] == {
            'bind': '/data/',
            'mode': 'rw'
        }
        assert environment_run_option["volumes"][data_dirpath] == {
            'bind': '/data/data_directory',
            'mode': 'rw'
        }

        # Error by passing directory which does not exist
        data_dirpath = os.path.join(self.temp_dir, "data_dne")
        data_filepath = os.path.join(self.temp_dir, "data_dne",
                                     "data_file.txt")
        data_file_path_map = [(data_filepath, "data_file.txt")]
        data_directory_path_map = [(data_dirpath, "data_directory")]
        failed = False
        try:
            self.task_controller._update_environment_run_options(
                environment_run_option, data_file_path_map,
                data_directory_path_map)
        except TaskRunError:
            failed = True

        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_run(self):
        self.__setup()
        # 0) Test failure case without command and without interactive
        # 1) Test success case with default values and env def file
        # 2) Test failure case if running same task (conflicting containers)
        # 3) Test failure case if running same task with snapshot_dict (conflicting containers)
        # 4) Test success case with snapshot_dict
        # 5) Test success case with saved file during task run
        # 6) Test success case with data file path being passed
        # 7) Test success case with data directory path being passed

        # TODO: look into log filepath randomness, sometimes logs are not written

        # Create task in the project
        task_obj = self.task_controller.create()

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # 0) Test option 0
        failed = False
        try:
            self.task_controller.run(task_obj.id)
        except RequiredArgumentMissing:
            failed = True
        assert failed

        failed = False
        try:
            self.task_controller.run(
                task_obj.id,
                task_dict={
                    "command": None,
                    "interactive": False,
                    "ports": None
                })
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # Create task_dict
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        # 1) Test option 1
        updated_task_obj = self.task_controller.run(
            task_obj.id, task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        assert isinstance(updated_task_obj, Task)
        assert task_obj.id == updated_task_obj.id

        assert updated_task_obj.before_snapshot_id
        assert updated_task_obj.ports == None
        assert updated_task_obj.interactive == False
        assert updated_task_obj.task_dirpath
        assert updated_task_obj.log_filepath
        assert updated_task_obj.start_time

        assert updated_task_obj.after_snapshot_id
        assert updated_task_obj.run_id
        assert updated_task_obj.logs
        assert "accuracy" in updated_task_obj.logs
        assert updated_task_obj.results
        assert updated_task_obj.results == {"accuracy": "0.45"}
        assert after_snapshot_obj.stats == {"accuracy": "0.45"}
        assert updated_task_obj.status == "SUCCESS"
        assert updated_task_obj.end_time
        assert updated_task_obj.duration

        self.task_controller.stop(task_obj.id)

        # 2) Test option 2
        failed = False
        try:
            self.task_controller.run(task_obj.id)
        except TaskRunError:
            failed = True
        assert failed

        # 3) Test option 3

        # Create files to add
        self.project_controller.file_driver.create("dirpath1", directory=True)
        self.project_controller.file_driver.create("dirpath2", directory=True)
        self.project_controller.file_driver.create("filepath1")

        # Snapshot dictionary
        snapshot_dict = {
            "paths": [
                os.path.join(self.project_controller.home, "dirpath1"),
                os.path.join(self.project_controller.home, "dirpath2"),
                os.path.join(self.project_controller.home, "filepath1")
            ],
        }

        # Run a basic task in the project
        failed = False
        try:
            self.task_controller.run(task_obj.id, snapshot_dict=snapshot_dict)
        except TaskRunError:
            failed = True
        assert failed

        # Test when the specific task id is already RUNNING
        # Create task in the project
        task_obj_1 = self.task_controller.create()
        self.task_controller.dal.task.update({
            "id": task_obj_1.id,
            "status": "RUNNING"
        })
        # Create environment_driver definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        failed = False
        try:
            self.task_controller.run(task_obj_1.id, task_dict=task_dict)
        except TaskRunError:
            failed = True
        assert failed

        # 4) Test option 4

        # Create a new task in the project
        task_obj_2 = self.task_controller.create()

        # Run another task in the project
        updated_task_obj_2 = self.task_controller.run(
            task_obj_2.id, task_dict=task_dict, snapshot_dict=snapshot_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_2.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        assert isinstance(updated_task_obj_2, Task)
        assert task_obj_2.id == updated_task_obj_2.id

        assert updated_task_obj_2.before_snapshot_id
        assert updated_task_obj_2.ports == None
        assert updated_task_obj_2.interactive == False
        assert updated_task_obj_2.task_dirpath
        assert updated_task_obj_2.log_filepath
        assert updated_task_obj_2.start_time

        assert updated_task_obj_2.after_snapshot_id
        assert updated_task_obj_2.run_id
        assert updated_task_obj_2.logs
        assert "accuracy" in updated_task_obj_2.logs
        assert updated_task_obj_2.results
        assert updated_task_obj_2.results == {"accuracy": "0.45"}
        assert updated_task_obj_2.status == "SUCCESS"
        assert updated_task_obj_2.end_time
        assert updated_task_obj_2.duration

        self.task_controller.stop(task_obj_2.id)

        # 5) Test option 5

        # Create a basic script
        test_filepath = os.path.join(self.temp_dir, "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("import shutil\n"))
            f.write(to_bytes("print('hello')\n"))
            f.write(to_bytes("print(' accuracy: 0.56 ')\n"))
            f.write(
                to_bytes(
                    "with open(os.path.join('/task', 'new_file.txt'), 'a') as f:\n"
                ))
            f.write(to_bytes("    f.write('my test file')\n"))

        # Create task in the project
        task_obj_2 = self.task_controller.create()

        # Create task_dict
        task_command = ["python", "script.py"]
        task_dict = {"command_list": task_command}

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        updated_task_obj_2 = self.task_controller.run(
            task_obj_2.id, task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_2.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        assert isinstance(updated_task_obj_2, Task)
        assert updated_task_obj_2.before_snapshot_id
        assert updated_task_obj_2.ports == None
        assert updated_task_obj_2.interactive == False
        assert updated_task_obj_2.task_dirpath
        assert updated_task_obj_2.log_filepath
        assert updated_task_obj_2.start_time

        assert updated_task_obj_2.after_snapshot_id
        assert updated_task_obj_2.run_id
        assert updated_task_obj_2.logs
        assert "accuracy" in updated_task_obj_2.logs
        assert updated_task_obj_2.results
        assert updated_task_obj_2.results == {"accuracy": "0.56"}
        assert updated_task_obj_2.status == "SUCCESS"
        assert updated_task_obj_2.end_time
        assert updated_task_obj_2.duration

        self.task_controller.stop(task_obj_2.id)

        # test if after snapshot has the file written
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_2.after_snapshot_id)
        file_collection_obj = self.task_controller.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id)
        files_absolute_path = os.path.join(self.task_controller.home,
                                           file_collection_obj.path)

        assert os.path.isfile(os.path.join(files_absolute_path, "task.log"))
        assert os.path.isfile(
            os.path.join(files_absolute_path, "new_file.txt"))

        # 6) Test Option 6
        self.project_controller.file_driver.create("dirpath1", directory=True)
        self.project_controller.file_driver.create(
            os.path.join("dirpath1", "file.txt"))
        with open(
                os.path.join(self.project_controller.home, "dirpath1",
                             "file.txt"), "wb") as f:
            f.write(to_bytes('my initial line\n'))
        test_filename = "script.py"
        test_filepath = os.path.join(self.temp_dir, test_filename)
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("print('hello')\n"))
            f.write(to_bytes("import shutil\n"))

            f.write(
                to_bytes(
                    "with open(os.path.join('/data', 'file.txt'), 'a') as f:\n"
                ))
            f.write(to_bytes("    f.write('my test file')\n"))

        # Create task in the project
        task_obj_3 = self.task_controller.create()

        # Create task_dict
        task_command = ["python", test_filename]
        task_dict = {
            "command_list":
                task_command,
            "data_file_path_map": [(os.path.join(self.project_controller.home,
                                                 "dirpath1", "file.txt"),
                                    'file.txt')]
        }

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        updated_task_obj_3 = self.task_controller.run(
            task_obj_3.id, task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_3.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        assert isinstance(updated_task_obj_3, Task)
        assert updated_task_obj_3.before_snapshot_id
        assert updated_task_obj_3.ports == None
        assert updated_task_obj_3.interactive == False
        assert updated_task_obj_3.task_dirpath
        assert updated_task_obj_3.log_filepath
        assert updated_task_obj_3.start_time

        assert updated_task_obj_3.after_snapshot_id
        assert updated_task_obj_3.run_id
        assert updated_task_obj_3.logs
        assert updated_task_obj_3.status == "SUCCESS"
        assert updated_task_obj_3.end_time
        assert updated_task_obj_3.duration

        self.task_controller.stop(task_obj_3.id)

        # test if after snapshot has the file written
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_3.after_snapshot_id)
        file_collection_obj = self.task_controller.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id)
        files_absolute_path = os.path.join(self.task_controller.home,
                                           file_collection_obj.path)

        assert os.path.isfile(os.path.join(files_absolute_path, "task.log"))
        assert os.path.isfile(
            os.path.join(self.project_controller.home, "dirpath1", "file.txt"))
        assert "my initial line" in open(
            os.path.join(self.project_controller.home, "dirpath1", "file.txt"),
            "r").read()
        assert "my test file" in open(
            os.path.join(self.project_controller.home, "dirpath1", "file.txt"),
            "r").read()

        # 7) Test Option 7
        self.project_controller.file_driver.create("dirpath1", directory=True)
        self.project_controller.file_driver.create(
            os.path.join("dirpath1", "file.txt"))
        with open(
                os.path.join(self.project_controller.home, "dirpath1",
                             "file.txt"), "wb") as f:
            f.write(to_bytes('my initial line\n'))
        test_filename = "script.py"
        test_filepath = os.path.join(self.temp_dir, test_filename)
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("print('hello')\n"))
            f.write(to_bytes("import shutil\n"))

            f.write(
                to_bytes(
                    "with open(os.path.join('/data', 'dirpath1', 'file.txt'), 'a') as f:\n"
                ))
            f.write(to_bytes("    f.write('my test file')\n"))

        # Create task in the project
        task_obj_4 = self.task_controller.create()

        # Create task_dict
        task_command = ["python", test_filename]
        task_dict = {
            "command_list":
                task_command,
            "data_directory_path_map": [(os.path.join(
                self.project_controller.home, "dirpath1"), 'dirpath1')]
        }

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        updated_task_obj_4 = self.task_controller.run(
            task_obj_4.id, task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_4.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        assert isinstance(updated_task_obj_4, Task)
        assert updated_task_obj_4.before_snapshot_id
        assert updated_task_obj_4.ports == None
        assert updated_task_obj_4.interactive == False
        assert updated_task_obj_4.task_dirpath
        assert updated_task_obj_4.log_filepath
        assert updated_task_obj_4.start_time

        assert updated_task_obj_4.after_snapshot_id
        assert updated_task_obj_4.run_id
        assert updated_task_obj_4.logs
        assert updated_task_obj_4.status == "SUCCESS"
        assert updated_task_obj_4.end_time
        assert updated_task_obj_4.duration

        self.task_controller.stop(task_obj_4.id)

        # test if after snapshot has the file written
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj_4.after_snapshot_id)
        file_collection_obj = self.task_controller.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id)
        files_absolute_path = os.path.join(self.task_controller.home,
                                           file_collection_obj.path)

        assert os.path.isfile(os.path.join(files_absolute_path, "task.log"))
        assert os.path.isfile(
            os.path.join(self.project_controller.home, "dirpath1", "file.txt"))
        assert "my initial line" in open(
            os.path.join(self.project_controller.home, "dirpath1", "file.txt"),
            "r").read()
        assert "my test file" in open(
            os.path.join(self.project_controller.home, "dirpath1", "file.txt"),
            "r").read()

    def test_list(self):
        self.__setup()
        # Create tasks in the project
        task_obj_1 = self.task_controller.create()
        task_obj_2 = self.task_controller.create()

        # List all tasks regardless of filters
        result = self.task_controller.list()

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result

        # List all tasks regardless of filters in ascending
        result = self.task_controller.list(
            sort_key='created_at', sort_order='ascending')

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result
        assert result[0].created_at <= result[-1].created_at

        # List all tasks regardless of filters in descending
        result = self.task_controller.list(
            sort_key='created_at', sort_order='descending')
        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result
        assert result[0].created_at >= result[-1].created_at

        # Wrong order being passed in
        failed = False
        try:
            _ = self.task_controller.list(
                sort_key='created_at', sort_order='wrong_order')
        except InvalidArgumentType:
            failed = True
        assert failed

        # Wrong key and order being passed in
        failed = False
        try:
            _ = self.task_controller.list(
                sort_key='wrong_key', sort_order='wrong_order')
        except InvalidArgumentType:
            failed = True
        assert failed

        # wrong key and right order being passed in
        expected_result = self.task_controller.list(
            sort_key='created_at', sort_order='ascending')
        result = self.task_controller.list(
            sort_key='wrong_key', sort_order='ascending')
        expected_ids = [item.id for item in expected_result]
        ids = [item.id for item in result]
        assert set(expected_ids) == set(ids)

        # List all tasks
        result = self.task_controller.list()

        assert len(result) == 2 and \
               task_obj_1 in result and \
               task_obj_2 in result

    def test_get(self):
        self.__setup()
        # Test failure for no task
        failed = False
        try:
            self.task_controller.get("random")
        except DoesNotExist:
            failed = True
        assert failed

        # Test success for task
        task_obj = self.task_controller.create()
        task_obj_returned = self.task_controller.get(task_obj.id)
        assert task_obj == task_obj_returned

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_get_files(self):
        self.__setup()
        # Test failure case
        failed = False
        try:
            self.task_controller.get_files("random")
        except DoesNotExist:
            failed = True
        assert failed

        # Create task in the project
        task_obj = self.task_controller.create()

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Create file to add
        self.project_controller.file_driver.create("dirpath1", directory=True)
        self.project_controller.file_driver.create(
            os.path.join("dirpath1", "filepath1"))

        # Snapshot dictionary
        snapshot_dict = {
            "paths": [
                os.path.join(self.project_controller.home, "dirpath1",
                             "filepath1")
            ],
        }

        # Create task_dict
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        # Test the default values
        updated_task_obj = self.task_controller.run(
            task_obj.id, task_dict=task_dict, snapshot_dict=snapshot_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)

        # TODO: Test case for during run and before_snapshot run
        # Get files for the task after run is complete (default)
        result = self.task_controller.get_files(updated_task_obj.id)

        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        file_collection_obj = self.task_controller.dal.file_collection.get_by_id(
            after_snapshot_obj.file_collection_id)

        file_names = [item.name for item in result]

        assert len(result) == 2
        for item in result:
            assert isinstance(item, TextIOWrapper)
            assert item.mode == "r"
        assert os.path.join(self.task_controller.home, ".datmo", "collections",
                            file_collection_obj.filehash,
                            "task.log") in file_names
        assert os.path.join(self.task_controller.home, ".datmo", "collections",
                            file_collection_obj.filehash,
                            "filepath1") in file_names

        # Get files for the task after run is complete for different mode
        result = self.task_controller.get_files(updated_task_obj.id, mode="a")

        assert len(result) == 2
        for item in result:
            assert isinstance(item, TextIOWrapper)
            assert item.mode == "a"
        assert os.path.join(self.task_controller.home, ".datmo", "collections",
                            file_collection_obj.filehash,
                            "task.log") in file_names
        assert os.path.join(self.task_controller.home, ".datmo", "collections",
                            file_collection_obj.filehash,
                            "filepath1") in file_names

        self.task_controller.stop(task_obj.id)

    def test_update(self):
        self.__setup()
        # Create task in the project
        task_obj = self.task_controller.create()
        assert isinstance(task_obj, Task)

        # Test 1: When no meta data is passed
        updated_task_obj = self.task_controller.update(task_obj.id)
        assert updated_task_obj.workspace is None

        # Test 2: When meta data for workspace is passed
        test_workspace = "notebook"
        test_command = "python script.py"
        updated_task_obj = self.task_controller.update(
            task_obj.id, workspace=test_workspace, command=test_command)
        assert updated_task_obj.workspace == test_workspace
        assert updated_task_obj.command == test_command
        assert updated_task_obj.command_list == ["python", "script.py"]

        # Test 3: When meta data for workspace is passed
        test_interactive = True
        updated_task_obj = self.task_controller.update(
            task_obj.id, interactive=test_interactive)
        assert updated_task_obj.interactive == test_interactive

        # Test 4: When meta data for workspace is passed
        test_command_list = ["python", "script.py"]
        updated_task_obj = self.task_controller.update(
            task_obj.id, command_list=test_command_list)
        assert updated_task_obj.command_list == ["python", "script.py"]

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_delete(self):
        self.__setup()
        # Create tasks in the project
        task_obj = self.task_controller.create()

        # Delete task from the project
        result = self.task_controller.delete(task_obj.id)

        # Check if task retrieval throws error
        thrown = False
        try:
            self.task_controller.dal.snapshot.get_by_id(task_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
               thrown == True

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_failure(self):
        self.__setup()
        # 1) Test required arguments not provided
        # 2) Test too many arguments found
        # 3) Test incorrect task id given

        # 1) Test option 1
        failed = False
        try:
            self.task_controller.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.task_controller.stop(task_id="test_task_id", all=True)
        except TooManyArgumentsFound:
            failed = True
        assert failed

        # 3) Test option 3
        thrown = False
        try:
            self.task_controller.stop(task_id="incorrect_task_id")
        except DoesNotExist:
            thrown = True
        assert thrown

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_success(self):
        self.__setup()
        # 1) Test stop with task_id
        # 2) Test stop with all given

        # Create task in the project
        task_obj = self.task_controller.create()

        # Create environment driver definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # Create task_dict
        task_command = ["sh", "-c", "echo accuracy:0.45"]
        task_dict = {"command_list": task_command}

        # 1) Test option 1
        updated_task_obj = self.task_controller.run(
            task_obj.id, task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)
        task_id = updated_task_obj.id
        result = self.task_controller.stop(task_id=task_id)
        after_task_obj = self.task_controller.dal.task.get_by_id(task_id)

        assert result
        assert after_task_obj.status == "STOPPED"

        # 2) Test option 2
        task_obj_2 = self.task_controller.create()
        updated_task_obj = self.task_controller.run(
            task_obj_2.id, task_dict=task_dict)
        after_snapshot_obj = self.task_controller.dal.snapshot.get_by_id(
            updated_task_obj.after_snapshot_id)
        environment_obj = self.task_controller.dal.environment.get_by_id(
            after_snapshot_obj.environment_id)
        self.environment_ids.append(environment_obj.id)
        result = self.task_controller.stop(all=True)
        all_task_objs = self.task_controller.dal.task.query({})

        assert result
        for task_obj in all_task_objs:
            assert task_obj.status == "STOPPED"
Пример #6
0
class TestEnvironmentController():
    def setup_method(self):
        # provide mountable tmp directory for docker
        tempfile.tempdir = "/tmp" if not platform.system(
        ) == "Windows" else None
        test_datmo_dir = os.environ.get('TEST_DATMO_DIR',
                                        tempfile.gettempdir())
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        self.project = ProjectController(self.temp_dir)
        self.environment = EnvironmentController(self.temp_dir)

    def teardown_method(self):
        pass

    def test_create(self):
        # 0) Test create when unsupported language given
        # 1) Test create when NO file exists and NO definition path exists
        # 2) Test create when NO file exists and definition path exists
        # 3) Test create when definition path exists and given
        # 4) Test create when file exists and definition path exists
        # 5) Test create when file exists but NO definition path exists
        # 6) Test create when definition path exists and given for NEW definition filepath

        self.project.init("test3", "test description")

        # 0) Test option 0
        try:
            self.environment.create({"language": "java"})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # 1) Test option 1
        # Creates environment with python3 based docker image
        environment_obj_0 = self.environment.create({})
        assert environment_obj_0
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        # 2) Test option 2
        environment_obj_1 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_1
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # 3) Test option 3
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj_2 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_2
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_3
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        # Remove definition filepath
        os.remove(definition_filepath)

        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_4.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_4
        assert environment_obj_4.id
        assert environment_obj_4.driver_type == "docker"
        assert environment_obj_4.file_collection_id
        assert environment_obj_4.definition_filename
        assert environment_obj_4.hardware_info
        assert environment_obj_4.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmorequirements.txt"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_1.id != environment_obj_4.id

        # 6) Test option 6

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM cloudgear/ubuntu:14.04")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create a new environment obj
        environment_obj_5 = self.environment.create(input_dict)

        # Get file collection path
        file_collection_obj = self.environment.dal.file_collection. \
            get_by_id(environment_obj_5.file_collection_id)
        file_collection_dir = self.environment.file_driver. \
            get_collection_path(file_collection_obj.filehash)

        assert environment_obj_5
        assert environment_obj_5.id
        assert environment_obj_5.driver_type == "docker"
        assert environment_obj_5.file_collection_id
        assert environment_obj_5.definition_filename
        assert environment_obj_5.hardware_info
        assert environment_obj_5.unique_hash == file_collection_obj.filehash
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))

        assert environment_obj_5.id != environment_obj_1.id
        assert environment_obj_5.id != environment_obj_4.id

    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project.init("test5", "test description")

        # 1) Test option 1
        failed = False
        try:
            _ = self.environment.build("does_not_exist")
        except EntityNotFound:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))
        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict)
        result = self.environment.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment.create({})
        result = self.environment.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment.create({})
        result = self.environment.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        environment_obj_4 = self.environment.create({})
        result = self.environment.build(environment_obj_4.id)
        # 2) Test run script, with autogenerated definition
        assert result
        assert environment_obj_4.id != environment_obj_1.id

        # teardown
        self.environment.delete(environment_obj_1.id)
        self.environment.delete(environment_obj_4.id)

    def test_run(self):
        # 1) Test run simple command with simple Dockerfile
        self.project.init("test5", "test description")

        # 1) Test option 1

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "detach": True,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment.home, "script.py")
        with open(test_filepath, "w") as f:
            f.write(to_unicode("import numpy\n"))
            f.write(to_unicode("import sklearn\n"))
            f.write(to_unicode("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment.create({})
        self.environment.build(environment_obj.id)

        random_name = ''.join([
            random.choice(string.ascii_letters + string.digits)
            for _ in range(32)
        ])
        run_options = {
            "command": ["python", "script.py"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": {
                self.environment.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # teardown
        self.environment.delete(environment_obj.id)

    def test_interactive_run(self):
        # 1) Test run interactive terminal in environment
        # 2) Test run jupyter notebook in environment
        # Create environment definition
        self.project.init("test6", "test description")

        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)
        # 1) Test option 1
        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": [],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "detach": True,
                "stdin_open": True,
                "tty": True,
                "api": False
            }

            log_filepath = os.path.join(self.project.home, "task.log")

            # Build environment in the project
            _ = self.environment.build(environment_obj.id)

            # Run environment in the project
            self.environment.run(environment_obj.id, run_options, log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # teardown
        self.environment.delete(environment_obj.id)

        # 2) Test option 2
        environment_obj = self.environment.create(input_dict)

        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": ["jupyter", "notebook"],
                "ports": ["8888:8888"],
                "name": container_name,
                "volumes": None,
                "detach": True,
                "stdin_open": False,
                "tty": False,
                "api": False
            }

            log_filepath = os.path.join(self.project.home, "task.log")

            # Build environment in the project
            _ = self.environment.build(environment_obj.id)

            # Run environment in the project
            self.environment.run(environment_obj.id, run_options, log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # Stop the running environment
        # self.environment.stop(container_name)

        # teardown
        self.environment.delete(environment_obj.id)

    def test_list(self):
        self.project.init("test4", "test description")

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_path_1, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict_1 = {
            "definition_filepath": definition_path_1,
        }

        # Create environment in the project
        environment_obj_1 = self.environment.create(input_dict_1)

        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment.home, "Dockerfile2")
        with open(definition_path_2, "w") as f:
            f.write(to_unicode(str("FROM datmo/scikit-opencv")))

        input_dict_2 = {
            "definition_filepath": definition_path_2,
        }

        # Create second environment in the project
        environment_obj_2 = self.environment.create(input_dict_2)

        # List all environments and ensure they exist
        result = self.environment.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_delete(self):
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        # Delete environment in the project
        result = self.environment.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment.dal.environment.get_by_id(environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    def test_stop_failure(self):
        # 1) Test failure with RequiredArgumentMissing
        # 2) Test failure with TooManyArgumentsFound

        # 1) Test option 1
        failed = False
        try:
            self.environment.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.environment.stop(run_id="hello", match_string="there")
        except TooManyArgumentsFound:
            failed = True
        assert failed

    def test_stop_success(self):
        # TODO: test more run options
        # 1) Test run_id input to stop
        # 2) Test match_string input to stop
        # 3) Test all input to stop
        self.project.init("test5", "test description")

        # Create environment definition
        definition_filepath = os.path.join(self.environment.home, "Dockerfile")
        with open(definition_filepath, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment.model.id + "-" + "test",
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Create environment definition
        env_def_path = os.path.join(self.project.home, "Dockerfile")
        with open(env_def_path, "w") as f:
            f.write(to_unicode(str("FROM datmo/xgboost:cpu")))

        input_dict = {
            "definition_filepath": definition_filepath,
        }

        # Create environment in the project
        environment_obj = self.environment.create(input_dict)

        log_filepath = os.path.join(self.project.home, "task.log")

        # Build environment in the project
        _ = self.environment.build(environment_obj.id)

        # 1) Test option 1

        _, run_id, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment.stop(run_id=run_id)

        assert return_code

        # 2) Test option 2
        _, _, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment.stop(match_string="datmo-task-" +
                                            self.environment.model.id)

        assert return_code

        # 3) Test option 3
        _, _, _ = \
            self.environment.run(environment_obj.id, run_options, log_filepath)
        run_options_2 = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment.model.id + "-" + "test2",
            "volumes": None,
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }
        _, _, _ = \
            self.environment.run(environment_obj.id, run_options_2, log_filepath)
        return_code = self.environment.stop(all=True)

        assert return_code

        # teardown
        self.environment.delete(environment_obj.id)
Пример #7
0
class EnvironmentCommand(ProjectCommand):
    def __init__(self, cli_helper):
        super(EnvironmentCommand, self).__init__(cli_helper)

    def environment(self):
        self.parse(["environment", "--help"])
        return True

    @Helper.notify_no_project_found
    def setup(self, **kwargs):
        self.environment_controller = EnvironmentController()
        name = kwargs.get("name", None)
        available_environments = self.environment_controller.get_supported_environments(
        )
        if not name:
            name = self.cli_helper.prompt_available_environments(
                available_environments)
        try:
            options = {"name": name}
            environment_obj = self.environment_controller.setup(
                options=options)
            self.cli_helper.echo(
                __("info", "cli.environment.setup.success",
                   (environment_obj.name, environment_obj.id)))
            return environment_obj
        except EnvironmentDoesNotExist:
            self.cli_helper.echo(
                __("error", "cli.environment.setup.argument", name))

    @Helper.notify_no_project_found
    def create(self, **kwargs):
        self.environment_controller = EnvironmentController()
        self.cli_helper.echo(__("info", "cli.environment.create"))
        created_environment_obj = self.environment_controller.create(kwargs)
        environments = self.environment_controller.list()
        for environment_obj in environments:
            if created_environment_obj == environment_obj:
                self.cli_helper.echo(
                    __("info", "cli.environment.create.alreadyexist",
                       created_environment_obj.id))
                return created_environment_obj
        self.cli_helper.echo(
            __("info", "cli.environment.create.success",
               created_environment_obj.id))
        return created_environment_obj

    @Helper.notify_no_project_found
    def update(self, **kwargs):
        self.environment_controller = EnvironmentController()
        environment_id = kwargs.get('id')
        name = kwargs.get('name', None)
        description = kwargs.get("description", None)
        result = self.environment_controller.update(environment_id,
                                                    name=name,
                                                    description=description)
        return result

    @Helper.notify_environment_active(EnvironmentController)
    @Helper.notify_no_project_found
    def delete(self, **kwargs):
        self.environment_controller = EnvironmentController()
        environment_id = kwargs.get('id')
        if self.environment_controller.delete(environment_id):
            self.cli_helper.echo(
                __("info", "cli.environment.delete.success", environment_id))
            return True

    @Helper.notify_no_project_found
    def ls(self, **kwargs):
        self.environment_controller = EnvironmentController()
        print_format = kwargs.get('format', "table")
        download = kwargs.get('download', None)
        download_path = kwargs.get('download_path', None)
        environment_objs = self.environment_controller.list()
        header_list = ["id", "created at", "name", "description"]
        item_dict_list = []
        for environment_obj in environment_objs:
            environment_obj_name = printable_object(environment_obj.name)
            environment_obj_description = printable_object(
                environment_obj.description)
            item_dict_list.append({
                "id":
                environment_obj.id,
                "created at":
                prettify_datetime(environment_obj.created_at),
                "name":
                environment_obj_name,
                "description":
                environment_obj_description
            })
        if download:
            if not download_path:
                # download to current working directory with timestamp
                current_time = datetime.utcnow()
                epoch_time = datetime.utcfromtimestamp(0)
                current_time_unix_time_ms = (
                    current_time - epoch_time).total_seconds() * 1000.0
                download_path = os.path.join(
                    self.environment_controller.home,
                    "environment_ls_" + str(current_time_unix_time_ms))
            self.cli_helper.print_items(header_list,
                                        item_dict_list,
                                        print_format=print_format,
                                        output_path=download_path)
            return environment_objs
        self.cli_helper.print_items(header_list,
                                    item_dict_list,
                                    print_format=print_format)
        return environment_objs
Пример #8
0
class SnapshotController(BaseController):
    """SnapshotController inherits from BaseController and manages business logic related to snapshots

    Parameters
    ----------
    home : str
        home path of the project

    Attributes
    ----------
    code : CodeController
    file_collection : FileCollectionController
    environment : EnvironmentController

    Methods
    -------
    create(dictionary)
        Create a snapshot within the project
    checkout(id)
        Checkout to a specific snapshot within the project
    list(session_id=None)
        List all snapshots present within the project based on given filters
    delete(id)
        Delete the snapshot specified from the project

    """
    def __init__(self, home):
        super(SnapshotController, self).__init__(home)
        self.code = CodeController(home)
        self.file_collection = FileCollectionController(home)
        self.environment = EnvironmentController(home)
        if not self.is_initialized:
            raise ProjectNotInitializedException(
                __("error", "controller.snapshot.__init__"))

    def create(self, incoming_dictionary):
        """Create snapshot object

        Parameters
        ----------
        dictionary : dict

            for each of the 5 key components, this function will search for
            one of the variables below starting from the top. Default functionality
            is described below for each component as well for reference if none
            of the variables are given.

            code :
                code_id : str, optional
                    code reference associated with the snapshot; if not
                    provided will look to inputs below for code creation
                commit_id : str, optional
                    commit id provided by the user if already available

                Default
                -------
                commits will be taken and code created via the  CodeController
                and are added to the snapshot at the time of snapshot creation

            environment :
                environment_id : str, optional
                    id for environment used to create snapshot
                environment_definition_filepath : str, optional
                    absolute filepath for the environment definition file
                    (e.g. Dockerfile path for Docker)

                Default
                -------
                default environment files will be searched and environment will
                be created with the EnvironmentController and added to the snapshot
                at the time of snapshot creation

            file_collection :
                file_collection_id : str, optional
                    file collection associated with the snapshot
                filepaths : list, optional
                    list of files or folder paths to include within the snapshot

                Default
                -------
                filepaths will be considered empty ([]), and the FileCollectionController
                will create a blank FileCollection that is empty.

            config :
                config : dict, optional
                    key, value pairs of configurations
                config_filepath : str, optional
                    absolute filepath to configuration parameters file
                config_filename : str, optional
                    name of file with configuration parameters

                Default
                -------
                config will be considered empty ({}) and saved to the snapshot

            stats :
                stats : dict, optional
                    key, value pairs of metrics and statistics
                stats_filepath : str, optional
                    absolute filepath to stats parameters file
                stats_filename : str, optional
                    name of file with metrics and statistics.

                Default
                -------
                stats will be considered empty ({}) and saved to the snapshot

            for the remaining optional arguments it will search for them
            in the input dictionary

                message : str
                    long description of snapshot
                session_id : str, optional
                    session id within which snapshot is created,
                    will overwrite default if given
                task_id : str, optional
                    task id associated with snapshot
                label : str, optional
                    short description of snapshot
                visible : bool, optional
                    True if visible to user via list command else False

        Returns
        -------
        Snapshot
            Snapshot object as specified in datmo.core.entity.snapshot

        Raises
        ------
        RequiredArgumentMissing
            if required arguments are not given by the user
        FileIOException
            if files are not present or there is an error in File IO
        """
        # Validate Inputs
        create_dict = {
            "model_id": self.model.id,
            "session_id": self.current_session.id,
        }

        # Message must be present
        if "message" in incoming_dictionary:
            create_dict['message'] = incoming_dictionary['message']
        else:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.create.arg", "message"))

        # Code setup
        self._code_setup(incoming_dictionary, create_dict)

        # Environment setup
        self._env_setup(incoming_dictionary, create_dict)

        # File setup
        self._file_setup(incoming_dictionary, create_dict)

        # Config setup
        self._config_setup(incoming_dictionary, create_dict)

        # Stats setup
        self._stats_setup(incoming_dictionary, create_dict)

        # If snapshot object with required args already exists, return it
        # DO NOT create a new snapshot with the same required arguments
        results = self.dal.snapshot.query({
            "model_id":
            create_dict["model_id"],
            "code_id":
            create_dict['code_id'],
            "environment_id":
            create_dict['environment_id'],
            "file_collection_id":
            create_dict['file_collection_id'],
            "config":
            create_dict['config'],
            "stats":
            create_dict['stats']
        })
        if results: return results[0]

        # Optional args for Snapshot entity
        optional_args = ["task_id", "label", "visible"]
        for optional_arg in optional_args:
            if optional_arg in incoming_dictionary:
                create_dict[optional_arg] = incoming_dictionary[optional_arg]

        # Create snapshot and return
        return self.dal.snapshot.create(Snapshot(create_dict))

    def create_from_task(self, message, task_id):
        """Create snapshot from a completed task.
        # TODO: enable create from task DURING a run

        Parameters
        ----------
        message : str
            long description of snapshot
        task_id : str
            task object to use to create snapshot

        Returns
        -------
        Snapshot
            Snapshot object as specified in datmo.core.entity.snapshot

        Raises
        ------
        TaskNotComplete
            if task specified has not been completed
        """
        task_obj = self.dal.task.get_by_id(task_id)

        if not task_obj.status and not task_obj.after_snapshot_id:
            raise TaskNotComplete(
                __("error", "controller.snapshot.create_from_task",
                   str(task_obj.id)))

        return self.dal.snapshot.update({
            "id": task_obj.after_snapshot_id,
            "message": message,
            "stats": task_obj.results,
            "visible": True
        })

    def checkout(self, snapshot_id):
        # Get snapshot object
        snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id)
        code_obj = self.dal.code.get_by_id(snapshot_obj.code_id)
        file_collection_obj = self.dal.file_collection.\
            get_by_id(snapshot_obj.file_collection_id)

        # Create new code_driver ref to revert back (if needed)
        # TODO: Save this to be reverted to
        current_code_obj = self.code.create()

        # Checkout code_driver to the relevant commit ref
        self.code_driver.checkout_ref(code_obj.commit_id)

        # Pull file collection to the project home
        dst_dirpath = os.path.join("datmo_snapshots", snapshot_id)
        abs_dst_dirpath = self.file_driver.create(dst_dirpath, directory=True)
        self.file_driver.transfer_collection(file_collection_obj.filehash,
                                             abs_dst_dirpath)
        return True

    def list(self, session_id=None, visible=None):
        query = {}
        if session_id:
            try:
                self.dal.session.get_by_id(session_id)
            except EntityNotFound:
                raise SessionDoesNotExistException(
                    __("error", "controller.snapshot.list", session_id))
            query['session_id'] = session_id
        if visible is not None and isinstance(visible, bool):
            query['visible'] = visible

        return self.dal.snapshot.query(query)

    def delete(self, snapshot_id):
        if not snapshot_id:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.delete.arg", "snapshot_id"))
        return self.dal.snapshot.delete(snapshot_id)

    def _code_setup(self, incoming_dictionary, create_dict):
        """ Set the code_id by using:
            1. code_id
            2. commit_id string, which creates a new code_id
            3. create a new code id

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """

        if "code_id" in incoming_dictionary:
            create_dict['code_id'] = incoming_dictionary['code_id']
        elif "commit_id" in incoming_dictionary:
            create_dict['code_id'] = self.code.\
                create(commit_id=incoming_dictionary['commit_id']).id
        else:
            create_dict['code_id'] = self.code.create().id

    def _env_setup(self, incoming_dictionary, create_dict):
        """ TODO:

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """

        language = incoming_dictionary.get("language", None)
        if "environment_id" in incoming_dictionary:
            create_dict['environment_id'] = incoming_dictionary[
                'environment_id']
        elif "environment_definition_filepath" in incoming_dictionary:
            create_dict['environment_id'] = self.environment.create({
                "definition_filepath":
                incoming_dictionary['environment_definition_filepath']
            }).id
        elif language:
            create_dict['environment_id'] = self.environment.\
                create({"language": language}).id
        else:
            # create some default environment
            create_dict['environment_id'] = self.environment.\
                create({}).id

    def _file_setup(self, incoming_dictionary, create_dict):
        """ TODO:

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """

        if "file_collection_id" in incoming_dictionary:
            create_dict['file_collection_id'] = incoming_dictionary[
                'file_collection_id']
        elif "filepaths" in incoming_dictionary:
            # transform file paths to file_collection_id
            create_dict['file_collection_id'] = self.file_collection.\
                create(incoming_dictionary['filepaths']).id
        else:
            # create some default file collection
            create_dict['file_collection_id'] = self.file_collection.\
                create([]).id

    def _config_setup(self, incoming_dictionary, create_dict):
        """ Fills in snapshot config by having one of the following:
            1. config = JSON object
            2. config_filepath = some location where a json file exists
            3. config_filename = just the file nam
        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity

        Raises
        ------
        FileIOException
        """
        if "config" in incoming_dictionary:
            create_dict['config'] = incoming_dictionary['config']
        elif "config_filepath" in incoming_dictionary:
            if not os.path.isfile(incoming_dictionary['config_filepath']):
                raise FileIOException(
                    __("error", "controller.snapshot.create.file_config"))
            # If path exists transform file to config dict
            config_json_driver = JSONStore(
                incoming_dictionary['config_filepath'])
            create_dict['config'] = config_json_driver.to_dict()
        elif "config_filename" in incoming_dictionary:
            config_filename = incoming_dictionary['config_filename'] \
                if "config_filename" in incoming_dictionary else "config.json"
            create_dict['config'] = self._find_in_filecollection(
                config_filename, create_dict['file_collection_id'])
        else:
            create_dict['config'] = {}

    def _stats_setup(self, incoming_dictionary, create_dict):
        """Fills in snapshot stats by having one of the following:
            1. stats = JSON object
            2. stats_filepath = some location where a json file exists
            3. stats_filename = just the file name

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity

        Raises
        ------
        FileIOException
        """

        if "stats" in incoming_dictionary:
            create_dict['stats'] = incoming_dictionary['stats']
        elif "stats_filepath" in incoming_dictionary:
            if not os.path.isfile(incoming_dictionary['stats_filepath']):
                raise FileIOException(
                    __("error", "controller.snapshot.create.file_stat"))
            # If path exists transform file to config dict
            stats_json_driver = JSONStore(
                incoming_dictionary['stats_filepath'])
            create_dict['stats'] = stats_json_driver.to_dict()
        elif "stats_filename" in incoming_dictionary:
            stats_filename = incoming_dictionary['stats_filename'] \
                if "stats_filename" in incoming_dictionary else "stats.json"
            create_dict['stats'] = self._find_in_filecollection(
                stats_filename, create_dict['file_collection_id'])
        else:
            create_dict['stats'] = {}

    def _find_in_filecollection(self, file_to_find, file_collection_id):
        """ Attempts to find a file within the file collection

        Returns
        -------
        dict
            output dictionary of the JSON file
        """

        file_collection_obj = self.file_collection.dal.file_collection.\
            get_by_id(file_collection_id)
        file_collection_path = \
            self.file_collection.file_driver.get_collection_path(
                file_collection_obj.filehash)
        # find all of the possible paths it could exist
        possible_paths = [os.path.join(self.home, file_to_find)] + \
                            [os.path.join(self.home, item[0], file_to_find)
                            for item in os.walk(file_collection_path)]
        existing_possible_paths = [
            possible_path for possible_path in possible_paths
            if os.path.isfile(possible_path)
        ]
        if not existing_possible_paths:
            # TODO: Add some info / warning that no file was found
            # create some default stats
            return {}
        else:
            # If any such path exists, transform file to stats dict
            json_file = JSONStore(existing_possible_paths[0])
            return json_file.to_dict()