示例#1
0
class TestEnvironmentController():
    def setup_method(self):
        self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir)
        Config().set_home(self.temp_dir)
        self.project_controller = ProjectController()
        self.environment_ids = []

    def teardown_method(self):
        if not check_docker_inactive(test_datmo_dir):
            if self.project_controller.is_initialized:
                self.environment_controller = EnvironmentController()
                for env_id in list(set(self.environment_ids)):
                    if not self.environment_controller.delete(env_id):
                        raise Exception

    def __setup(self):
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()
        with open(os.path.join(self.temp_dir, "test.txt"), "wb") as f:
            f.write(to_bytes("hello"))
        self.random_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "test")
        with open(self.random_filepath, "wb") as f:
            f.write(to_bytes("cool"))
        self.definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        with open(self.definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

    def test_init_fail_project_not_init(self):
        Config().set_home(self.temp_dir)
        failed = False
        try:
            EnvironmentController()
        except ProjectNotInitialized:
            failed = True
        assert failed

    def test_get_environment_type(self):
        self.__setup()
        result = self.environment_controller.get_environment_types()
        assert result

    def test_get_supported_environments(self):
        self.__setup()
        environment_type = "cpu"
        result = self.environment_controller.get_supported_frameworks(
            environment_type)
        assert result

    def test_get_supported_languages(self):
        self.__setup()
        environment_type = "cpu"
        environment_name = "data-analytics"
        result = self.environment_controller.get_supported_languages(
            environment_type, environment_name)
        assert result

    def test_setup(self):
        # TODO: Run all environment options and test if success
        self.project_controller.init("test_setup", "test description")
        self.environment_controller = EnvironmentController()

        # Test success setup once (no files present)
        options = {
            "environment_framework": "data-analytics",
            "environment_type": "cpu",
            "environment_language": "py27"
        }
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == "%s:%s-%s" % (options['environment_framework'],
                                            options['environment_type'],
                                            options['environment_language'])
        assert result.description == "supported environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/data-analytics:cpu-py27" in open(
            output_definition_filepath, "r").read()
        # Test success setup again (files present, but staged)
        options = {
            "environment_framework": "data-analytics",
            "environment_type": "cpu",
            "environment_language": "py27"
        }
        result = self.environment_controller.setup(options=options)
        output_definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")

        assert isinstance(result, Environment)
        assert result.name == "%s:%s-%s" % (options['environment_framework'],
                                            options['environment_type'],
                                            options['environment_language'])
        assert result.description == "supported environment created by datmo"
        assert os.path.isfile(output_definition_filepath)
        assert "FROM datmo/data-analytics:cpu-py27" in open(
            output_definition_filepath, "r").read()

        # Test failure in downstream function (e.g. bad inputs, no name given)
        failed = False
        try:
            self.environment_controller.setup(options={})
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Change environment file
        with open(output_definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Test failure setup (unstaged changes)
        failed = False
        try:
            self.environment_controller.setup(options=options)
        except UnstagedChanges:
            failed = True
        assert failed

    def test_current_environment(self):
        self.__setup()
        # Test failure with unstaged changes
        failed = False
        try:
            self.environment_controller.current_environment()
        except UnstagedChanges:
            failed = True
        assert failed
        # Test successful current environment
        input_dict = {"name": "test", "description": "test description"}
        environment_obj = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj.id)
        current_environment_obj = self.environment_controller.current_environment(
        )
        assert current_environment_obj == environment_obj

    def test_create(self):
        # 0) Test SUCCESS create when definition path exists in project environment directory (no input, no root) -- with hardware file
        # 1) Test SUCCESS create when definition path exists in project environment directory (no input, no root)
        # 5) Test SUCCESS when definition path exists in project environment directory and passed from input dict (takes input)
        # 2) Test SUCCESS create when definition path exists in root project folder (no input, no project environment dir)
        # 3) Test SUCCESS create when definition path is passed into input dict (takes input, no project environment dir)
        # 4) Test SUCCESS create when definition path is passed into input dict along with expected filename to be saved
        # 6) Test FAIL when passing same filepath with same filename into input dict

        self.__setup()

        input_dict_0 = {"name": "test", "description": "test description"}

        # 0) Test option 0 (cannot test hash because hardware is machine-dependent)
        environment_obj_0 = self.environment_controller.create(input_dict_0)
        self.environment_ids.append(environment_obj_0.id)
        assert environment_obj_0
        assert isinstance(environment_obj_0, Environment)
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info
        assert environment_obj_0.unique_hash
        assert environment_obj_0.name == "test"
        assert environment_obj_0.description == "test description"

        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_0.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "test"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert os.path.isfile(
            os.path.join(file_collection_dir, "hardware_info"))
        output = open(os.path.join(file_collection_dir, "hardware_info"),
                      "r").read()
        print(repr(output))

        # 1) Test option 1
        environment_obj_0 = self.environment_controller.create(
            input_dict_0, save_hardware_file=False)
        self.environment_ids.append(environment_obj_0.id)
        assert environment_obj_0
        assert isinstance(environment_obj_0, Environment)
        assert environment_obj_0.id
        assert environment_obj_0.driver_type == "docker"
        assert environment_obj_0.file_collection_id
        assert environment_obj_0.definition_filename
        assert environment_obj_0.hardware_info
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_0.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "test"))
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert environment_obj_0.unique_hash == "1e32ff083520f792cbe4bafdc2de2a01"
        assert environment_obj_0.name == "test"
        assert environment_obj_0.description == "test description"
        # Files ["test", "Dockerfile"]

        # 5) Test option 5
        input_dict_1 = {
            "name": "test",
            "description": "test description",
            "paths": [self.definition_filepath],
        }

        environment_obj = self.environment_controller.create(
            input_dict_1, save_hardware_file=False)
        self.environment_ids.append(environment_obj.id)
        assert environment_obj
        assert isinstance(environment_obj, Environment)
        assert environment_obj.id
        assert environment_obj.driver_type == "docker"
        assert environment_obj.file_collection_id
        assert environment_obj.definition_filename
        assert environment_obj.hardware_info
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        output = open(os.path.join(file_collection_dir, "Dockerfile"),
                      "r").read()
        print(repr(output))
        assert not os.path.isfile(
            os.path.join(file_collection_dir, "datmoDockerfile"))
        print(repr(output))
        assert environment_obj.unique_hash == "fd725be022ce93f870c81e2ee170189c"
        assert environment_obj.name == "test"
        assert environment_obj.description == "test description"
        # Files ["Dockerfile"]

        # remove the project environment directory
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))

        # Create environment definition in root directory
        home_definition_filepath = os.path.join(
            self.environment_controller.home, "Dockerfile")
        with open(home_definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        # 2) Test option 2
        environment_obj_1 = self.environment_controller.create(
            input_dict_0, save_hardware_file=False)
        self.environment_ids.append(environment_obj_1.id)
        assert environment_obj_1
        assert isinstance(environment_obj_1, Environment)
        assert environment_obj_1.id
        assert environment_obj_1.driver_type == "docker"
        assert environment_obj_1.file_collection_id
        assert environment_obj_1.definition_filename
        assert environment_obj_1.hardware_info
        assert environment_obj_1.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_1.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_1.name == "test"
        assert environment_obj_1.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert environment_obj_1.unique_hash == "fd725be022ce93f870c81e2ee170189c"

        # 3) Test option 3
        input_dict_2 = {
            "name": "test",
            "description": "test description",
            "paths": [home_definition_filepath],
        }

        # Create environment in the project
        environment_obj_2 = self.environment_controller.create(
            input_dict_2, save_hardware_file=False)
        self.environment_ids.append(environment_obj_2.id)
        assert environment_obj_2
        assert isinstance(environment_obj_2, Environment)
        assert environment_obj_2.id
        assert environment_obj_2.driver_type == "docker"
        assert environment_obj_2.file_collection_id
        assert environment_obj_2.definition_filename
        assert environment_obj_2.hardware_info
        assert environment_obj_2.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_2.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_2.name == "test"
        assert environment_obj_2.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert environment_obj_2.unique_hash == "fd725be022ce93f870c81e2ee170189c"

        # 4) Test option 4
        input_dict_3 = {
            "paths": [home_definition_filepath + ">Dockerfile"],
        }

        # Create environment in the project
        environment_obj_3 = self.environment_controller.create(
            input_dict_3, save_hardware_file=False)
        self.environment_ids.append(environment_obj_3.id)
        assert environment_obj_3
        assert isinstance(environment_obj_3, Environment)
        assert environment_obj_3.id
        assert environment_obj_3.driver_type == "docker"
        assert environment_obj_3.file_collection_id
        assert environment_obj_3.definition_filename
        assert environment_obj_3.hardware_info
        assert environment_obj_3.unique_hash == file_collection_obj.filehash
        # Get file collection path
        file_collection_obj = self.environment_controller.dal.file_collection. \
            get_by_id(environment_obj_3.file_collection_id)
        file_collection_dir = self.environment_controller.file_driver. \
            get_collection_path(file_collection_obj.filehash)
        assert environment_obj_3.name == "test"
        assert environment_obj_3.description == "test description"
        assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile"))
        assert environment_obj_3.unique_hash == "fd725be022ce93f870c81e2ee170189c"

        # 6) Test option 6
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")

        input_dict = {
            "paths": [
                definition_filepath + ">Dockerfile",
                definition_filepath + ">Dockerfile"
            ],
        }

        # Create environment in the project
        failed = False
        try:
            _ = self.environment_controller.create(input_dict,
                                                   save_hardware_file=False)
        except FileAlreadyExistsError:
            failed = True

        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_build(self):
        # 1) Test build when no environment given
        # 2) Test build when definition path exists and given
        # 3) Test build when NO file exists and definition path exists
        # 4) Test build when file exists and definition path exists
        # 5) Test build when file exists but NO definition path exists
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            _ = self.environment_controller.build("does_not_exist")
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))
        input_dict = {
            "paths": [definition_filepath],
        }

        # 2) Test option 2
        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj_1.id)
        result = self.environment_controller.build(environment_obj_1.id)
        assert result

        # 3) Test option 3
        # Create environment in the project
        environment_obj_2 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_2.id)
        assert result

        # Create script to test
        test_filepath = os.path.join(self.environment_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import numpy\n"))
            f.write(to_bytes("import sklearn\n"))
            f.write(to_bytes("print('hello')\n"))

        # 4) Test option 4
        environment_obj_3 = self.environment_controller.create({})
        result = self.environment_controller.build(environment_obj_3.id)
        assert result

        # test 2), 3), and 4) will result in the same environment
        assert environment_obj_1.id == environment_obj_2.id
        assert environment_obj_2.id == environment_obj_3.id

        # Test for building dockerfile when there exists not
        os.remove(definition_filepath)

        # 5) Test option 5
        # Create environment definition in project environment directory
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))
        environment_obj_4 = self.environment_controller.create({})
        self.environment_ids.append(environment_obj_4.id)
        result = self.environment_controller.build(environment_obj_4.id)
        assert result

        # 6) Test option 6
        # Create environment definition in project environment directory with datmo base image
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(
                to_bytes("FROM datmo/data-analytics:cpu-py27%s" % os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))
        environment_obj_4 = self.environment_controller.create({})
        self.environment_ids.append(environment_obj_4.id)
        result = self.environment_controller.build(environment_obj_4.id,
                                                   workspace="notebook")
        assert result

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_extract_workspace_url(self):
        # Create environment definition
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(
                to_bytes("FROM datmo/python-base:cpu-py27-notebook" +
                         os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))

        image_name = "test"
        input_dict = {"name": image_name, "description": "test description"}
        # Create environment in the project
        environment_obj = self.environment_controller.create(
            input_dict, save_hardware_file=False)
        self.environment_controller.build(environment_obj.id)

        # Test when there is no container being run
        workspace_url = self.environment_controller.extract_workspace_url(
            image_name, "notebook")
        assert workspace_url == None

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_run(self):
        # Test run simple command with simple Dockerfile
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # 0) Test option 0
        # Create environment definition in project environment directory
        definition_filepath = os.path.join(
            self.environment_controller.file_driver.environment_directory,
            "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))

        random_name = str(uuid.uuid1())
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": random_name,
            "volumes": None,
            "detach": True,
            "stdin_open": False,
            "mem_limit": "4g",
            "tty": False,
            "api": False
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create({})
        self.environment_ids.append(environment_obj.id)
        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs
        # remove Dockerfile
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))

        # 1) Test option 1
        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))

        random_name = str(uuid.uuid1())
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8889:8889"],
            "name": random_name,
            "volumes": None,
            "mem_limit": "4g",
            "detach": True,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj.id)
        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

        # 2) Test option 2
        os.remove(definition_filepath)

        # Create script to test
        test_filepath = os.path.join(self.environment_controller.home,
                                     "script.py")
        with open(test_filepath, "wb") as f:
            f.write(to_bytes("import os\n"))
            f.write(to_bytes("import sys\n"))
            f.write(to_bytes("print('hello')\n"))

        # Create environment in the project
        environment_obj = self.environment_controller.create({})
        self.environment_ids.append(environment_obj.id)
        self.environment_controller.build(environment_obj.id)

        random_name = str(uuid.uuid1())
        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8899:8899"],
            "name": random_name,
            "volumes": {
                self.environment_controller.home: {
                    'bind': '/home/',
                    'mode': 'rw'
                }
            },
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Run environment in the project
        return_code, run_id, logs = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)

        assert return_code == 0
        assert run_id
        assert logs

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_interactive_run(self):
        # 1) Test run interactive terminal in environment
        # 2) Test run jupyter notebook in environment
        # Create environment definition
        self.project_controller.init("test6", "test description")
        self.environment_controller = EnvironmentController()

        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(definition_filepath, "wb") as f:
            f.write(
                to_bytes("FROM nbgallery/jupyter-alpine:latest" + os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj.id)
        # 1) Test option 1
        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": [],
                "ports": ["8889:8889"],
                "name": environment_obj.id + "-" + container_name,
                "volumes": None,
                "mem_limit": "4g",
                "detach": True,
                "stdin_open": True,
                "tty": True,
                "api": False
            }

            log_filepath = os.path.join(self.project_controller.home,
                                        "task.log")

            # Build environment in the project
            _ = self.environment_controller.build(environment_obj.id)

            # Run environment in the project
            self.environment_controller.run(environment_obj.id, run_options,
                                            log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

        # 2) Test option 2
        environment_obj = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj.id)

        @timeout_decorator.timeout(10, use_signals=False)
        def timed_run(container_name, timed_run):
            run_options = {
                "command": ["jupyter", "notebook", "--allow-root"],
                "ports": ["8888:8888"],
                "name": environment_obj.id + "-" + container_name,
                "volumes": None,
                "mem_limit": "4g",
                "detach": True,
                "stdin_open": False,
                "tty": False,
                "api": False
            }

            log_filepath = os.path.join(self.project_controller.home,
                                        "task.log")

            # Build environment in the project
            _ = self.environment_controller.build(environment_obj.id)

            # Run environment in the project
            self.environment_controller.run(environment_obj.id, run_options,
                                            log_filepath)

            return timed_run

        container_name = str(uuid.uuid1())
        timed_run_result = False
        try:
            timed_run_result = timed_run(container_name, timed_run_result)
        except timeout_decorator.timeout_decorator.TimeoutError:
            timed_run_result = True

        assert timed_run_result

    def test_list(self):
        self.project_controller.init("test4", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition for object 1
        definition_path_1 = os.path.join(self.environment_controller.home,
                                         "Dockerfile")
        with open(definition_path_1, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict_1 = {
            "paths": [definition_path_1],
        }

        # Create environment in the project
        environment_obj_1 = self.environment_controller.create(input_dict_1)
        self.environment_ids.append(environment_obj_1.id)
        # Create environment definition for object 2
        definition_path_2 = os.path.join(self.environment_controller.home,
                                         "Dockerfile2")
        with open(definition_path_2, "wb") as f:
            f.write(to_bytes("FROM python:3.4-alpine"))

        input_dict_2 = {
            "paths": [definition_path_2 + ">Dockerfile"],
        }

        # Create second environment in the project
        environment_obj_2 = self.environment_controller.create(input_dict_2)
        self.environment_ids.append(environment_obj_2.id)
        # List all environments and ensure they exist
        result = self.environment_controller.list()

        assert len(result) == 2 and \
            environment_obj_1 in result and \
            environment_obj_2 in result

    def test_update(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj.id)
        # Test success update
        new_name = "test name"
        new_description = "test description"
        result = self.environment_controller.update(
            environment_obj.id, name=new_name, description=new_description)
        assert result
        assert isinstance(result, Environment)
        assert result.name == new_name
        assert result.description == new_description

        # Test failed update
        failed = False
        try:
            self.environment_controller.update("random_id",
                                               name=new_name,
                                               description=new_description)
        except EnvironmentDoesNotExist:
            failed = True
        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_delete(self):
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)

        # Delete environment in the project
        result = self.environment_controller.delete(environment_obj.id)

        # Check if environment retrieval throws error
        thrown = False
        try:
            self.environment_controller.dal.environment.get_by_id(
                environment_obj.id)
        except EntityNotFound:
            thrown = True

        assert result == True and \
            thrown == True

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_failure(self):
        # 1) Test failure with RequiredArgumentMissing
        # 2) Test failure with TooManyArgumentsFound
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()
        # 1) Test option 1
        failed = False
        try:
            self.environment_controller.stop()
        except RequiredArgumentMissing:
            failed = True
        assert failed

        # 2) Test option 2
        failed = False
        try:
            self.environment_controller.stop(run_id="hello",
                                             match_string="there")
        except TooManyArgumentsFound:
            failed = True
        assert failed

    @pytest_docker_environment_failed_instantiation(test_datmo_dir)
    def test_stop_success(self):
        # TODO: test more run options
        # 1) Test run_id input to stop
        # 2) Test match_string input to stop
        # 3) Test all input to stop
        # 4) Test if the image was removed by stop
        self.project_controller.init("test5", "test description")
        self.environment_controller = EnvironmentController()

        # Create environment definition
        definition_filepath = os.path.join(self.environment_controller.home,
                                           "Dockerfile")
        with open(definition_filepath, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine"))

        run_options = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8888:8888"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }

        # Create environment definition
        env_def_path = os.path.join(self.project_controller.home, "Dockerfile")
        random_text = str(uuid.uuid1())
        with open(env_def_path, "wb") as f:
            f.write(to_bytes("FROM python:3.5-alpine" + os.linesep))
            f.write(to_bytes(str("RUN echo " + random_text)))

        input_dict = {
            "paths": [definition_filepath],
        }

        # Create environment in the project
        environment_obj = self.environment_controller.create(input_dict)
        self.environment_ids.append(environment_obj.id)
        log_filepath = os.path.join(self.project_controller.home, "task.log")

        # Build environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        # 1) Test option 1

        _, run_id, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(
            run_id=run_id, environment_id=environment_obj.id)

        assert return_code

        # 2) Test option 2

        # Rebuild environment in the project
        _ = self.environment_controller.build(environment_obj.id)

        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        return_code = self.environment_controller.stop(
            match_string="datmo-task-" + self.environment_controller.model.id,
            environment_id=environment_obj.id)

        assert return_code

        # 3) Test option 3

        # Rebuild environment in the project
        _ = self.environment_controller.build(environment_obj.id)
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options, log_filepath)
        run_options_2 = {
            "command": ["sh", "-c", "echo yo"],
            "ports": ["8889:8889"],
            "name": "datmo-task-" + self.environment_controller.model.id +
            "-" + "test2",
            "volumes": None,
            "mem_limit": "4g",
            "detach": False,
            "stdin_open": False,
            "tty": False,
            "api": False
        }
        _, _, _ = \
            self.environment_controller.run(environment_obj.id, run_options_2, log_filepath)
        return_code = self.environment_controller.stop(
            all=True, environment_id=environment_obj.id)

        assert return_code

    def test_exists(self):
        # Test failure, not initialized
        failed = False
        try:
            _ = self.environment_controller.create({})
        except:
            failed = True
        assert failed

        # Setup
        self.__setup()
        environment_obj = self.environment_controller.create({})
        self.environment_ids.append(environment_obj.id)
        # Check by environment id
        result = self.environment_controller.exists(
            environment_id=environment_obj.id)
        assert result

        # Check by unique hash
        result = self.environment_controller.exists(
            environment_unique_hash=environment_obj.unique_hash)
        assert result

        # Test with wrong environment id
        result = self.environment_controller.exists(
            environment_id='test_wrong_env_id')
        assert not result

    def test_calculate_project_environment_hash(self):
        # Setup
        self.__setup()
        # Test hashing the default (with hardware info)
        result = self.environment_controller._calculate_project_environment_hash(
        )
        assert result
        # Test hashing the default Dockerfile
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "1e32ff083520f792cbe4bafdc2de2a01"
        # Test if hash is the same as that of create
        environment_obj = self.environment_controller.create(
            {}, save_hardware_file=False)
        self.environment_ids.append(environment_obj.id)
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "1e32ff083520f792cbe4bafdc2de2a01"
        assert result == environment_obj.unique_hash

        # Test if the hash is the same if the same file is passed in as an input
        input_dict = {
            "paths": [self.definition_filepath, self.random_filepath]
        }
        environment_obj_1 = self.environment_controller.create(
            input_dict, save_hardware_file=False)
        self.environment_ids.append(environment_obj_1.id)
        result = self.environment_controller._calculate_project_environment_hash(
            save_hardware_file=False)
        assert result == "1e32ff083520f792cbe4bafdc2de2a01"
        assert result == environment_obj_1.unique_hash

    def test_has_unstaged_changes(self):
        # Setup
        self.__setup()
        environment_obj = self.environment_controller.create({})
        self.environment_ids.append(environment_obj.id)
        # Check for no unstaged changes
        result = self.environment_controller._has_unstaged_changes()
        assert not result

        # Make a change to the file (update python version)
        with open(
                os.path.join(
                    self.environment_controller.file_driver.
                    environment_directory, "Dockerfile"), "wb") as f:
            f.write(to_bytes("FROM python:3.6-alpine"))

        # Check again, should have unstaged changes
        result = self.environment_controller._has_unstaged_changes()
        assert result

    def test_check_unstaged_changes(self):
        # Setup
        self.__setup()
        environment_obj = self.environment_controller.create({})
        self.environment_ids.append(environment_obj.id)

        # 1) After commiting the changes
        # Check for no unstaged changes because already committed
        result = self.environment_controller.check_unstaged_changes()
        assert not result

        # Add a new file
        with open(
                os.path.join(
                    self.environment_controller.file_driver.
                    environment_directory, "test2"), "wb") as f:
            f.write(to_bytes("cool"))

        # 2) Not commiting the changes, should error and raise UnstagedChanges
        failed = False
        try:
            self.environment_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed

        # Remove new file
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test2"))

        # 3) Files are the same as before but no new commit, should have no unstaged changes
        result = self.environment_controller.check_unstaged_changes()
        assert not result

        # 4) Remove another file, now it is different and should have unstaged changes
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        failed = False
        try:
            self.environment_controller.check_unstaged_changes()
        except UnstagedChanges:
            failed = True
        assert failed

        # 5) Remove the rest of the files, now it is empty and should return as already staged
        os.remove(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        result = self.environment_controller.check_unstaged_changes()
        assert not result

    def test_checkout(self):
        # Setup and create all environment files
        self.__setup()

        # Create environment to checkout to with defaults
        environment_obj = self.environment_controller.create({})
        self.environment_ids.append(environment_obj.id)
        # Checkout success with there are no unstaged changes
        result = self.environment_controller.checkout(environment_obj.id)
        assert result
        current_hash = self.environment_controller._calculate_project_environment_hash(
        )
        assert environment_obj.unique_hash == current_hash
        # Check the filenames as well because the hash does not take this into account
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "datmoDockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "hardware_info"))

        # Change file contents to make it unstaged
        with open(self.definition_filepath, "wb") as f:
            f.write(to_bytes("new content"))

        # Checkout failure with unstaged changes
        failed = False
        try:
            _ = self.environment_controller.checkout(environment_obj.id)
        except UnstagedChanges:
            failed = True
        assert failed

        # Create new environment to checkout to with defaults (no hardware)
        environment_obj_1 = self.environment_controller.create(
            {}, save_hardware_file=False)
        self.environment_ids.append(environment_obj_1.id)
        # Checkout success with there are no unstaged changes
        result = self.environment_controller.checkout(environment_obj.id)
        assert result
        current_hash = self.environment_controller._calculate_project_environment_hash(
        )
        assert environment_obj.unique_hash == current_hash
        assert environment_obj_1.unique_hash != current_hash
        # Check the filenames as well because the hash does not take this into account
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "test"))
        assert os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "Dockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "datmoDockerfile"))
        assert not os.path.isfile(
            os.path.join(
                self.environment_controller.file_driver.environment_directory,
                "hardware_info"))
示例#2
0
class SnapshotController(BaseController):
    """SnapshotController inherits from BaseController and manages business logic related to snapshots

    Parameters
    ----------
    home : str
        home path of the project

    Attributes
    ----------
    code : datmo.core.controller.code.code.CodeController
    file_collection : datmo.core.controller.file.file_collection.FileCollectionController
    environment : datmo.core.controller.environment.environment.EnvironmentController

    Methods
    -------
    create(dictionary)
        Create a snapshot within the project
    checkout(id)
        Checkout to a specific snapshot within the project
    list(session_id=None)
        List all snapshots present within the project based on given filters
    delete(id)
        Delete the snapshot specified from the project

    """
    def __init__(self):
        super(SnapshotController, self).__init__()
        self.code_controller = CodeController()
        self.file_collection_controller = FileCollectionController()
        self.environment_controller = EnvironmentController()
        if not self.is_initialized:
            raise ProjectNotInitialized(
                __("error", "controller.snapshot.__init__"))

    def current_snapshot(self):
        """Get current snapshot and return if no unstaged changes

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot or None
            current snapshot object state if not unstaged and exists. if DNE return None (should never occur)

        Raises
        ------
        UnstagedChanges
            if there are unstaged changes error out because no current snapshot
        """
        current_code_obj = self.code_controller.current_code()
        current_environment_obj = self.environment_controller.current_environment(
        )
        current_file_collection_obj = self.file_collection_controller.current_file_collection(
        )

        # If snapshot object with required args already exists, return it
        query = {
            "model_id": self.model.id,
            "code_id": current_code_obj.id,
            "environment_id": current_environment_obj.id,
            "file_collection_id": current_file_collection_obj.id
        }
        results = self.dal.snapshot.query(query,
                                          sort_key="created_at",
                                          sort_order="descending")
        return results[
            0] if results else None  # Should never return None, unless DB is out of sync

    def create(self, dictionary):
        """Create snapshot object

        Parameters
        ----------
        dictionary : dict

            for each of the 5 key components, this function will search for
            one of the variables below starting from the top. Default functionality
            is described below for each component as well for reference if none
            of the variables are given.

            code :
                code_id : str, optional
                    code reference associated with the snapshot; if not
                    provided will look to inputs below for code creation
                commit_id : str, optional
                    commit id provided by the user if already available

                Default
                -------
                commits will be taken and code created via the  CodeController
                and are added to the snapshot at the time of snapshot creation

            environment :
                environment_id : str, optional
                    id for environment used to create snapshot
                workspace : str, optional
                    type of workspace being used
                environment_paths : list, optional
                    list of absolute or relative filepaths and/or dirpaths to collect with destination names
                    (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir")

                Default
                -------
                default environment files will be searched and environment will
                be created with the EnvironmentController and added to the snapshot
                at the time of snapshot creation

            file_collection :
                file_collection_id : str, optional
                    file collection associated with the snapshot
                paths : list, optional
                    list of absolute or relative filepaths and/or dirpaths to collect with destination names
                    (e.g. "/path/to/file:hello", "/path/to/file2", "/path/to/dir:newdir")

                Default
                -------
                paths will be considered empty ([]), and the FileCollectionController
                will create a blank FileCollection that is empty.

            config :
                config : dict, optional
                    key, value pairs of configurations
                config_filepath : str, optional
                    absolute filepath to configuration parameters file
                config_filename : str, optional
                    name of file with configuration parameters

                Default
                -------
                config will be considered empty ({}) and saved to the snapshot

            stats :
                stats : dict, optional
                    key, value pairs of metrics and statistics
                stats_filepath : str, optional
                    absolute filepath to stats parameters file
                stats_filename : str, optional
                    name of file with metrics and statistics.

                Default
                -------
                stats will be considered empty ({}) and saved to the snapshot

            for the remaining optional arguments it will search for them
            in the input dictionary

                message : str
                    long description of snapshot
                session_id : str, optional
                    session id within which snapshot is created,
                    will overwrite default if given
                task_id : str, optional
                    task id associated with snapshot
                label : str, optional
                    short description of snapshot
                visible : bool, optional
                    True if visible to user via list command else False

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            snapshot object with all relevant parameters

        Raises
        ------
        RequiredArgumentMissing
            if required arguments are not given by the user
        FileIOError
            if files are not present or there is an error in File IO
        """
        # Validate Inputs
        create_dict = {
            "model_id": self.model.id,
            "session_id": self.current_session.id,
        }

        validate("create_snapshot", dictionary)

        # Message must be present
        if "message" in dictionary:
            create_dict['message'] = dictionary['message']
        else:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.create.arg", "message"))

        # Code setup
        self._code_setup(dictionary, create_dict)

        # Environment setup
        self._env_setup(dictionary, create_dict)

        # File setup
        self._file_setup(dictionary, create_dict)

        # Config setup
        self._config_setup(dictionary, create_dict)

        # Stats setup
        self._stats_setup(dictionary, create_dict)

        # If snapshot object with required args already exists, return it
        # DO NOT create a new snapshot with the same required arguments
        results = self.dal.snapshot.query({
            "model_id":
            create_dict["model_id"],
            "code_id":
            create_dict['code_id'],
            "environment_id":
            create_dict['environment_id'],
            "file_collection_id":
            create_dict['file_collection_id'],
            "config":
            create_dict['config'],
            "stats":
            create_dict['stats']
        })
        if results: return results[0]

        # Optional args for Snapshot entity
        optional_args = ["task_id", "label", "visible"]
        for optional_arg in optional_args:
            if optional_arg in dictionary:
                create_dict[optional_arg] = dictionary[optional_arg]

        # Create snapshot and return
        return self.dal.snapshot.create(Snapshot(create_dict))

    def create_from_task(self,
                         message,
                         task_id,
                         label=None,
                         config=None,
                         stats=None):
        """Create snapshot from a completed task.
        # TODO: enable create from task DURING a run

        Parameters
        ----------
        message : str
            long description of snapshot
        task_id : str
            task object to use to create snapshot
        label: str, optional
            short description of snapshot
        config : dict, optional
            key, value pairs of configurations
        stats : dict, optional
            key, value pairs of metrics and statistics

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            snapshot object with all relevant parameters

        Raises
        ------
        TaskNotComplete
            if task specified has not been completed
        """

        validate(
            "create_snapshot_from_task", {
                "message": message,
                "task_id": task_id,
                "label": label,
                "config": config,
                "stats": stats
            })

        task_obj = self.dal.task.get_by_id(task_id)

        if not task_obj.status and not task_obj.after_snapshot_id:
            raise TaskNotComplete(
                __("error", "controller.snapshot.create_from_task",
                   str(task_obj.id)))

        after_snapshot_obj = self.dal.snapshot.get_by_id(
            task_obj.after_snapshot_id)

        snapshot_update_dict = {
            "id": task_obj.after_snapshot_id,
            "message": message,
            "visible": True
        }

        if label:
            snapshot_update_dict["label"] = label

        if config:
            snapshot_update_dict["config"] = config

        if stats:
            snapshot_update_dict["stats"] = stats
        else:
            # Append to any existing stats already present
            snapshot_update_dict["stats"] = {}
            if after_snapshot_obj.stats is not None:
                snapshot_update_dict["stats"].update(after_snapshot_obj.stats)
            if task_obj.results is not None:
                snapshot_update_dict["stats"].update(task_obj.results)
            if snapshot_update_dict["stats"] == {}:
                snapshot_update_dict["stats"] = None

        return self.dal.snapshot.update(snapshot_update_dict)

    def check_unstaged_changes(self):
        """Checks if there exists any unstaged changes for the snapshot in the project.

        Returns
        -------
        bool
            False if it's already staged else error

        Raises
        ------
        UnstagedChanges
            error if not there exists unstaged changes in snapshot
        """
        try:
            # check for unstaged changes in code
            self.code_controller.check_unstaged_changes()
            # check for unstaged changes in environment
            self.environment_controller.check_unstaged_changes()
            # check for unstaged changes in file
            self.file_collection_controller.check_unstaged_changes()
            return False
        except UnstagedChanges:
            raise UnstagedChanges()

    def checkout(self, snapshot_id):
        # Get snapshot object
        snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id)
        code_obj = self.dal.code.get_by_id(snapshot_obj.code_id)
        file_collection_obj = self.dal.file_collection.\
            get_by_id(snapshot_obj.file_collection_id)
        environment_obj = self.dal.environment. \
            get_by_id(snapshot_obj.environment_id)

        # Check for unstaged changes, if so error with UnstagedChanges
        self.check_unstaged_changes()

        # Checkout code to the relevant commit ref
        code_checkout_success = self.code_controller.checkout(code_obj.id)

        # Checkout environment to relevant environment id
        environment_checkout_success = self.environment_controller.checkout(
            environment_obj.id)

        # Checkout files to relevant file collection id
        file_checkout_success = self.file_collection_controller.checkout(
            file_collection_obj.id)

        return (code_checkout_success and environment_checkout_success
                and file_checkout_success)

    def list(self,
             session_id=None,
             visible=None,
             sort_key=None,
             sort_order=None):
        query = {}
        if session_id:
            try:
                self.dal.session.get_by_id(session_id)
            except EntityNotFound:
                raise SessionDoesNotExist(
                    __("error", "controller.snapshot.list", session_id))
            query['session_id'] = session_id
        if visible is not None and isinstance(visible, bool):
            query['visible'] = visible

        return self.dal.snapshot.query(query, sort_key, sort_order)

    def update(self,
               snapshot_id,
               config=None,
               stats=None,
               message=None,
               label=None,
               visible=None):
        """Update the snapshot metadata"""
        if not snapshot_id:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.delete.arg", "snapshot_id"))
        update_snapshot_input_dict = {'id': snapshot_id}
        validate(
            "update_snapshot", {
                "config": config,
                "stats": stats,
                "message": message,
                "label": label,
                "visible": visible
            })
        if config is not None:
            update_snapshot_input_dict['config'] = config
        if stats is not None:
            update_snapshot_input_dict['stats'] = stats
        if message is not None:
            update_snapshot_input_dict['message'] = message
        if label is not None:
            update_snapshot_input_dict['label'] = label
        if visible is not None:
            update_snapshot_input_dict['visible'] = visible

        return self.dal.snapshot.update(update_snapshot_input_dict)

    def get(self, snapshot_id):
        """Get snapshot object and return

        Parameters
        ----------
        snapshot_id : str
            id for the snapshot you would like to get

        Returns
        -------
        datmo.core.entity.snapshot.Snapshot
            core snapshot object

        Raises
        ------
        DoesNotExist
            snapshot does not exist
        """
        try:
            return self.dal.snapshot.get_by_id(snapshot_id)
        except EntityNotFound:
            raise DoesNotExist()

    def get_files(self, snapshot_id, mode="r"):
        """Get list of file objects for snapshot id.

        Parameters
        ----------
        snapshot_id : str
            id for the snapshot you would like to get file objects for
        mode : str
            file open mode
            (default is "r" to open file for read)

        Returns
        -------
        list
            list of python file objects

        Raises
        ------
        DoesNotExist
            snapshot object does not exist
        """
        try:
            snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id)
        except EntityNotFound:
            raise DoesNotExist()
        file_collection_obj = self.dal.file_collection.get_by_id(
            snapshot_obj.file_collection_id)
        return self.file_driver.get_collection_files(
            file_collection_obj.filehash, mode=mode)

    def delete(self, snapshot_id):
        """Delete all traces of a snapshot

        Parameters
        ----------
        snapshot_id : str
            id for the snapshot to remove

        Returns
        -------
        bool
            True if success

        Raises
        ------
        RequiredArgumentMissing
            if the provided snapshot_id is None
        """
        if not snapshot_id:
            raise RequiredArgumentMissing(
                __("error", "controller.snapshot.delete.arg", "snapshot_id"))
        return self.dal.snapshot.delete(snapshot_id)

    def _code_setup(self, incoming_dictionary, create_dict):
        """ Set the code_id by using:
            1. code_id
            2. commit_id string, which creates a new code_id
            3. create a new code id

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """
        if "code_id" in incoming_dictionary:
            create_dict['code_id'] = incoming_dictionary['code_id']
        elif "commit_id" in incoming_dictionary:
            create_dict['code_id'] = self.code_controller.\
                create(commit_id=incoming_dictionary['commit_id']).id
        else:
            create_dict['code_id'] = self.code_controller.create().id

    def _env_setup(self, incoming_dictionary, create_dict):
        """ TODO:

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """
        if "environment_id" in incoming_dictionary:
            create_dict['environment_id'] = incoming_dictionary[
                'environment_id']
        elif "environment_paths" in incoming_dictionary:
            create_dict['environment_id'] = self.environment_controller.create(
                {
                    "paths": incoming_dictionary['environment_paths']
                }).id
        else:
            # create some default environment
            create_dict['environment_id'] = self.environment_controller.\
                create({}).id

    def _file_setup(self, incoming_dictionary, create_dict):
        """ Checks for user inputs and uses the file collection controller to
        obtain the file collection id and create the necessary collection

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity
        """

        if "file_collection_id" in incoming_dictionary:

            create_dict['file_collection_id'] = incoming_dictionary[
                'file_collection_id']
        elif "paths" in incoming_dictionary:
            # transform file paths to file_collection_id
            create_dict['file_collection_id'] = self.file_collection_controller.\
                create(incoming_dictionary['paths']).id
        else:
            # create some default file collection
            create_dict['file_collection_id'] = self.file_collection_controller.\
                create([]).id

    def _config_setup(self, incoming_dictionary, create_dict):
        """ Fills in snapshot config by having one of the following:
            1. config = JSON object
            2. config_filepath = some location where a json file exists
            3. config_filename = just the file name

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity

        Raises
        ------
        FileIOError
        """
        if "config" in incoming_dictionary:
            create_dict['config'] = incoming_dictionary['config']
        elif "config_filepath" in incoming_dictionary:
            if not os.path.isfile(incoming_dictionary['config_filepath']):
                raise FileIOError(
                    __("error", "controller.snapshot.create.file_config"))
            # If path exists transform file to config dict
            config_json_driver = JSONStore(
                incoming_dictionary['config_filepath'])
            create_dict['config'] = config_json_driver.to_dict()
        elif "config_filename" in incoming_dictionary:
            config_filename = incoming_dictionary['config_filename']
            create_dict['config'] = self._find_in_filecollection(
                config_filename, create_dict['file_collection_id'])
        else:
            config_filename = "config.json"
            create_dict['config'] = self._find_in_filecollection(
                config_filename, create_dict['file_collection_id'])

    def _stats_setup(self, incoming_dictionary, create_dict):
        """Fills in snapshot stats by having one of the following:
            1. stats = JSON object
            2. stats_filepath = some location where a json file exists
            3. stats_filename = just the file name

        Parameters
        ----------
        incoming_dictionary : dict
            dictionary for the create function defined above
        create_dict : dict
            dictionary for creating the Snapshot entity

        Raises
        ------
        FileIOError
        """

        if "stats" in incoming_dictionary:
            create_dict['stats'] = incoming_dictionary['stats']
        elif "stats_filepath" in incoming_dictionary:
            if not os.path.isfile(incoming_dictionary['stats_filepath']):
                raise FileIOError(
                    __("error", "controller.snapshot.create.file_stat"))
            # If path exists transform file to config dict
            stats_json_driver = JSONStore(
                incoming_dictionary['stats_filepath'])
            create_dict['stats'] = stats_json_driver.to_dict()
        elif "stats_filename" in incoming_dictionary:
            stats_filename = incoming_dictionary['stats_filename']
            create_dict['stats'] = self._find_in_filecollection(
                stats_filename, create_dict['file_collection_id'])
        else:
            stats_filename = "stats.json"
            create_dict['stats'] = self._find_in_filecollection(
                stats_filename, create_dict['file_collection_id'])

    def _find_in_filecollection(self, file_to_find, file_collection_id):
        """ Attempts to find a file within the file collection

        Returns
        -------
        dict
            output dictionary of the JSON file
        """

        file_collection_obj = self.file_collection_controller.dal.file_collection.\
            get_by_id(file_collection_id)
        file_collection_path = \
            self.file_collection_controller.file_driver.get_collection_path(
                file_collection_obj.filehash)
        # find all of the possible paths it could exist
        possible_paths = [os.path.join(self.home, file_to_find)] + \
                            [os.path.join(self.home, item[0], file_to_find)
                            for item in os.walk(file_collection_path)]
        existing_possible_paths = [
            possible_path for possible_path in possible_paths
            if os.path.isfile(possible_path)
        ]
        if not existing_possible_paths:
            # TODO: Add some info / warning that no file was found
            # create some default stats
            return {}
        else:
            # If any such path exists, transform file to stats dict
            json_file = JSONStore(existing_possible_paths[0])
            return json_file.to_dict()