class TestEnvironmentController(): def setup_method(self): self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) Config().set_home(self.temp_dir) self.project_controller = ProjectController() self.environment_ids = [] def teardown_method(self): if not check_docker_inactive(test_datmo_dir): if self.project_controller.is_initialized: self.environment_controller = EnvironmentController() for env_id in list(set(self.environment_ids)): if not self.environment_controller.delete(env_id): raise Exception def __setup(self): self.project_controller.init("test_setup", "test description") self.environment_controller = EnvironmentController() with open(os.path.join(self.temp_dir, "test.txt"), "wb") as f: f.write(to_bytes("hello")) self.random_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "test") with open(self.random_filepath, "wb") as f: f.write(to_bytes("cool")) self.definition_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile") with open(self.definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) def test_init_fail_project_not_init(self): Config().set_home(self.temp_dir) failed = False try: EnvironmentController() except ProjectNotInitialized: failed = True assert failed def test_get_environment_type(self): self.__setup() result = self.environment_controller.get_environment_types() assert result def test_get_supported_environments(self): self.__setup() environment_type = "cpu" result = self.environment_controller.get_supported_frameworks( environment_type) assert result def test_get_supported_languages(self): self.__setup() environment_type = "cpu" environment_name = "data-analytics" result = self.environment_controller.get_supported_languages( environment_type, environment_name) assert result def test_setup(self): # TODO: Run all environment options and test if success self.project_controller.init("test_setup", "test description") self.environment_controller = EnvironmentController() # Test success setup once (no files present) options = { "environment_framework": "data-analytics", "environment_type": "cpu", "environment_language": "py27" } result = self.environment_controller.setup(options=options) output_definition_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile") assert isinstance(result, Environment) assert result.name == "%s:%s-%s" % (options['environment_framework'], options['environment_type'], options['environment_language']) assert result.description == "supported environment created by datmo" assert os.path.isfile(output_definition_filepath) assert "FROM datmo/data-analytics:cpu-py27" in open( output_definition_filepath, "r").read() # Test success setup again (files present, but staged) options = { "environment_framework": "data-analytics", "environment_type": "cpu", "environment_language": "py27" } result = self.environment_controller.setup(options=options) output_definition_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile") assert isinstance(result, Environment) assert result.name == "%s:%s-%s" % (options['environment_framework'], options['environment_type'], options['environment_language']) assert result.description == "supported environment created by datmo" assert os.path.isfile(output_definition_filepath) assert "FROM datmo/data-analytics:cpu-py27" in open( output_definition_filepath, "r").read() # Test failure in downstream function (e.g. bad inputs, no name given) failed = False try: self.environment_controller.setup(options={}) except EnvironmentDoesNotExist: failed = True assert failed # Change environment file with open(output_definition_filepath, "wb") as f: f.write(to_bytes("new content")) # Test failure setup (unstaged changes) failed = False try: self.environment_controller.setup(options=options) except UnstagedChanges: failed = True assert failed def test_current_environment(self): self.__setup() # Test failure with unstaged changes failed = False try: self.environment_controller.current_environment() except UnstagedChanges: failed = True assert failed # Test successful current environment input_dict = {"name": "test", "description": "test description"} environment_obj = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj.id) current_environment_obj = self.environment_controller.current_environment( ) assert current_environment_obj == environment_obj def test_create(self): # 0) Test SUCCESS create when definition path exists in project environment directory (no input, no root) -- with hardware file # 1) Test SUCCESS create when definition path exists in project environment directory (no input, no root) # 5) Test SUCCESS when definition path exists in project environment directory and passed from input dict (takes input) # 2) Test SUCCESS create when definition path exists in root project folder (no input, no project environment dir) # 3) Test SUCCESS create when definition path is passed into input dict (takes input, no project environment dir) # 4) Test SUCCESS create when definition path is passed into input dict along with expected filename to be saved # 6) Test FAIL when passing same filepath with same filename into input dict self.__setup() input_dict_0 = {"name": "test", "description": "test description"} # 0) Test option 0 (cannot test hash because hardware is machine-dependent) environment_obj_0 = self.environment_controller.create(input_dict_0) self.environment_ids.append(environment_obj_0.id) assert environment_obj_0 assert isinstance(environment_obj_0, Environment) assert environment_obj_0.id assert environment_obj_0.driver_type == "docker" assert environment_obj_0.file_collection_id assert environment_obj_0.definition_filename assert environment_obj_0.hardware_info assert environment_obj_0.unique_hash assert environment_obj_0.name == "test" assert environment_obj_0.description == "test description" # Get file collection path file_collection_obj = self.environment_controller.dal.file_collection. \ get_by_id(environment_obj_0.file_collection_id) file_collection_dir = self.environment_controller.file_driver. \ get_collection_path(file_collection_obj.filehash) assert os.path.isfile(os.path.join(file_collection_dir, "test")) assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile")) output = open(os.path.join(file_collection_dir, "Dockerfile"), "r").read() print(repr(output)) assert os.path.isfile( os.path.join(file_collection_dir, "hardware_info")) output = open(os.path.join(file_collection_dir, "hardware_info"), "r").read() print(repr(output)) # 1) Test option 1 environment_obj_0 = self.environment_controller.create( input_dict_0, save_hardware_file=False) self.environment_ids.append(environment_obj_0.id) assert environment_obj_0 assert isinstance(environment_obj_0, Environment) assert environment_obj_0.id assert environment_obj_0.driver_type == "docker" assert environment_obj_0.file_collection_id assert environment_obj_0.definition_filename assert environment_obj_0.hardware_info # Get file collection path file_collection_obj = self.environment_controller.dal.file_collection. \ get_by_id(environment_obj_0.file_collection_id) file_collection_dir = self.environment_controller.file_driver. \ get_collection_path(file_collection_obj.filehash) assert os.path.isfile(os.path.join(file_collection_dir, "test")) assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile")) output = open(os.path.join(file_collection_dir, "Dockerfile"), "r").read() print(repr(output)) assert environment_obj_0.unique_hash == "1e32ff083520f792cbe4bafdc2de2a01" assert environment_obj_0.name == "test" assert environment_obj_0.description == "test description" # Files ["test", "Dockerfile"] # 5) Test option 5 input_dict_1 = { "name": "test", "description": "test description", "paths": [self.definition_filepath], } environment_obj = self.environment_controller.create( input_dict_1, save_hardware_file=False) self.environment_ids.append(environment_obj.id) assert environment_obj assert isinstance(environment_obj, Environment) assert environment_obj.id assert environment_obj.driver_type == "docker" assert environment_obj.file_collection_id assert environment_obj.definition_filename assert environment_obj.hardware_info # Get file collection path file_collection_obj = self.environment_controller.dal.file_collection. \ get_by_id(environment_obj.file_collection_id) file_collection_dir = self.environment_controller.file_driver. \ get_collection_path(file_collection_obj.filehash) assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile")) output = open(os.path.join(file_collection_dir, "Dockerfile"), "r").read() print(repr(output)) assert not os.path.isfile( os.path.join(file_collection_dir, "datmoDockerfile")) print(repr(output)) assert environment_obj.unique_hash == "fd725be022ce93f870c81e2ee170189c" assert environment_obj.name == "test" assert environment_obj.description == "test description" # Files ["Dockerfile"] # remove the project environment directory os.remove( os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile")) os.remove( os.path.join( self.environment_controller.file_driver.environment_directory, "test")) # Create environment definition in root directory home_definition_filepath = os.path.join( self.environment_controller.home, "Dockerfile") with open(home_definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) # 2) Test option 2 environment_obj_1 = self.environment_controller.create( input_dict_0, save_hardware_file=False) self.environment_ids.append(environment_obj_1.id) assert environment_obj_1 assert isinstance(environment_obj_1, Environment) assert environment_obj_1.id assert environment_obj_1.driver_type == "docker" assert environment_obj_1.file_collection_id assert environment_obj_1.definition_filename assert environment_obj_1.hardware_info assert environment_obj_1.unique_hash == file_collection_obj.filehash # Get file collection path file_collection_obj = self.environment_controller.dal.file_collection. \ get_by_id(environment_obj_1.file_collection_id) file_collection_dir = self.environment_controller.file_driver. \ get_collection_path(file_collection_obj.filehash) assert environment_obj_1.name == "test" assert environment_obj_1.description == "test description" assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile")) assert environment_obj_1.unique_hash == "fd725be022ce93f870c81e2ee170189c" # 3) Test option 3 input_dict_2 = { "name": "test", "description": "test description", "paths": [home_definition_filepath], } # Create environment in the project environment_obj_2 = self.environment_controller.create( input_dict_2, save_hardware_file=False) self.environment_ids.append(environment_obj_2.id) assert environment_obj_2 assert isinstance(environment_obj_2, Environment) assert environment_obj_2.id assert environment_obj_2.driver_type == "docker" assert environment_obj_2.file_collection_id assert environment_obj_2.definition_filename assert environment_obj_2.hardware_info assert environment_obj_2.unique_hash == file_collection_obj.filehash # Get file collection path file_collection_obj = self.environment_controller.dal.file_collection. \ get_by_id(environment_obj_2.file_collection_id) file_collection_dir = self.environment_controller.file_driver. \ get_collection_path(file_collection_obj.filehash) assert environment_obj_2.name == "test" assert environment_obj_2.description == "test description" assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile")) assert environment_obj_2.unique_hash == "fd725be022ce93f870c81e2ee170189c" # 4) Test option 4 input_dict_3 = { "paths": [home_definition_filepath + ">Dockerfile"], } # Create environment in the project environment_obj_3 = self.environment_controller.create( input_dict_3, save_hardware_file=False) self.environment_ids.append(environment_obj_3.id) assert environment_obj_3 assert isinstance(environment_obj_3, Environment) assert environment_obj_3.id assert environment_obj_3.driver_type == "docker" assert environment_obj_3.file_collection_id assert environment_obj_3.definition_filename assert environment_obj_3.hardware_info assert environment_obj_3.unique_hash == file_collection_obj.filehash # Get file collection path file_collection_obj = self.environment_controller.dal.file_collection. \ get_by_id(environment_obj_3.file_collection_id) file_collection_dir = self.environment_controller.file_driver. \ get_collection_path(file_collection_obj.filehash) assert environment_obj_3.name == "test" assert environment_obj_3.description == "test description" assert os.path.isfile(os.path.join(file_collection_dir, "Dockerfile")) assert environment_obj_3.unique_hash == "fd725be022ce93f870c81e2ee170189c" # 6) Test option 6 definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") input_dict = { "paths": [ definition_filepath + ">Dockerfile", definition_filepath + ">Dockerfile" ], } # Create environment in the project failed = False try: _ = self.environment_controller.create(input_dict, save_hardware_file=False) except FileAlreadyExistsError: failed = True assert failed @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_build(self): # 1) Test build when no environment given # 2) Test build when definition path exists and given # 3) Test build when NO file exists and definition path exists # 4) Test build when file exists and definition path exists # 5) Test build when file exists but NO definition path exists self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() # 1) Test option 1 failed = False try: _ = self.environment_controller.build("does_not_exist") except EnvironmentDoesNotExist: failed = True assert failed # Create environment definition definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) input_dict = { "paths": [definition_filepath], } # 2) Test option 2 # Create environment in the project environment_obj_1 = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj_1.id) result = self.environment_controller.build(environment_obj_1.id) assert result # 3) Test option 3 # Create environment in the project environment_obj_2 = self.environment_controller.create({}) result = self.environment_controller.build(environment_obj_2.id) assert result # Create script to test test_filepath = os.path.join(self.environment_controller.home, "script.py") with open(test_filepath, "wb") as f: f.write(to_bytes("import numpy\n")) f.write(to_bytes("import sklearn\n")) f.write(to_bytes("print('hello')\n")) # 4) Test option 4 environment_obj_3 = self.environment_controller.create({}) result = self.environment_controller.build(environment_obj_3.id) assert result # test 2), 3), and 4) will result in the same environment assert environment_obj_1.id == environment_obj_2.id assert environment_obj_2.id == environment_obj_3.id # Test for building dockerfile when there exists not os.remove(definition_filepath) # 5) Test option 5 # Create environment definition in project environment directory definition_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) environment_obj_4 = self.environment_controller.create({}) self.environment_ids.append(environment_obj_4.id) result = self.environment_controller.build(environment_obj_4.id) assert result # 6) Test option 6 # Create environment definition in project environment directory with datmo base image definition_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write( to_bytes("FROM datmo/data-analytics:cpu-py27%s" % os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) environment_obj_4 = self.environment_controller.create({}) self.environment_ids.append(environment_obj_4.id) result = self.environment_controller.build(environment_obj_4.id, workspace="notebook") assert result @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_extract_workspace_url(self): # Create environment definition self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write( to_bytes("FROM datmo/python-base:cpu-py27-notebook" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) image_name = "test" input_dict = {"name": image_name, "description": "test description"} # Create environment in the project environment_obj = self.environment_controller.create( input_dict, save_hardware_file=False) self.environment_controller.build(environment_obj.id) # Test when there is no container being run workspace_url = self.environment_controller.extract_workspace_url( image_name, "notebook") assert workspace_url == None @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_run(self): # Test run simple command with simple Dockerfile self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() # 0) Test option 0 # Create environment definition in project environment directory definition_filepath = os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) random_name = str(uuid.uuid1()) run_options = { "command": ["sh", "-c", "echo yo"], "ports": ["8888:8888"], "name": random_name, "volumes": None, "detach": True, "stdin_open": False, "mem_limit": "4g", "tty": False, "api": False } # Create environment in the project environment_obj = self.environment_controller.create({}) self.environment_ids.append(environment_obj.id) log_filepath = os.path.join(self.project_controller.home, "task.log") # Build environment in the project _ = self.environment_controller.build(environment_obj.id) # Run environment in the project return_code, run_id, logs = \ self.environment_controller.run(environment_obj.id, run_options, log_filepath) assert return_code == 0 assert run_id assert logs # remove Dockerfile os.remove( os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile")) # 1) Test option 1 # Create environment definition definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) random_name = str(uuid.uuid1()) run_options = { "command": ["sh", "-c", "echo yo"], "ports": ["8889:8889"], "name": random_name, "volumes": None, "mem_limit": "4g", "detach": True, "stdin_open": False, "tty": False, "api": False } input_dict = { "paths": [definition_filepath], } # Create environment in the project environment_obj = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj.id) log_filepath = os.path.join(self.project_controller.home, "task.log") # Build environment in the project _ = self.environment_controller.build(environment_obj.id) # Run environment in the project return_code, run_id, logs = \ self.environment_controller.run(environment_obj.id, run_options, log_filepath) assert return_code == 0 assert run_id assert logs # 2) Test option 2 os.remove(definition_filepath) # Create script to test test_filepath = os.path.join(self.environment_controller.home, "script.py") with open(test_filepath, "wb") as f: f.write(to_bytes("import os\n")) f.write(to_bytes("import sys\n")) f.write(to_bytes("print('hello')\n")) # Create environment in the project environment_obj = self.environment_controller.create({}) self.environment_ids.append(environment_obj.id) self.environment_controller.build(environment_obj.id) random_name = str(uuid.uuid1()) run_options = { "command": ["sh", "-c", "echo yo"], "ports": ["8899:8899"], "name": random_name, "volumes": { self.environment_controller.home: { 'bind': '/home/', 'mode': 'rw' } }, "mem_limit": "4g", "detach": False, "stdin_open": False, "tty": False, "api": False } # Run environment in the project return_code, run_id, logs = \ self.environment_controller.run(environment_obj.id, run_options, log_filepath) assert return_code == 0 assert run_id assert logs @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_interactive_run(self): # 1) Test run interactive terminal in environment # 2) Test run jupyter notebook in environment # Create environment definition self.project_controller.init("test6", "test description") self.environment_controller = EnvironmentController() definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") random_text = str(uuid.uuid1()) with open(definition_filepath, "wb") as f: f.write( to_bytes("FROM nbgallery/jupyter-alpine:latest" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) input_dict = { "paths": [definition_filepath], } # Create environment in the project environment_obj = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj.id) # 1) Test option 1 @timeout_decorator.timeout(10, use_signals=False) def timed_run(container_name, timed_run): run_options = { "command": [], "ports": ["8889:8889"], "name": environment_obj.id + "-" + container_name, "volumes": None, "mem_limit": "4g", "detach": True, "stdin_open": True, "tty": True, "api": False } log_filepath = os.path.join(self.project_controller.home, "task.log") # Build environment in the project _ = self.environment_controller.build(environment_obj.id) # Run environment in the project self.environment_controller.run(environment_obj.id, run_options, log_filepath) return timed_run container_name = str(uuid.uuid1()) timed_run_result = False try: timed_run_result = timed_run(container_name, timed_run_result) except timeout_decorator.timeout_decorator.TimeoutError: timed_run_result = True assert timed_run_result # 2) Test option 2 environment_obj = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj.id) @timeout_decorator.timeout(10, use_signals=False) def timed_run(container_name, timed_run): run_options = { "command": ["jupyter", "notebook", "--allow-root"], "ports": ["8888:8888"], "name": environment_obj.id + "-" + container_name, "volumes": None, "mem_limit": "4g", "detach": True, "stdin_open": False, "tty": False, "api": False } log_filepath = os.path.join(self.project_controller.home, "task.log") # Build environment in the project _ = self.environment_controller.build(environment_obj.id) # Run environment in the project self.environment_controller.run(environment_obj.id, run_options, log_filepath) return timed_run container_name = str(uuid.uuid1()) timed_run_result = False try: timed_run_result = timed_run(container_name, timed_run_result) except timeout_decorator.timeout_decorator.TimeoutError: timed_run_result = True assert timed_run_result def test_list(self): self.project_controller.init("test4", "test description") self.environment_controller = EnvironmentController() # Create environment definition for object 1 definition_path_1 = os.path.join(self.environment_controller.home, "Dockerfile") with open(definition_path_1, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) input_dict_1 = { "paths": [definition_path_1], } # Create environment in the project environment_obj_1 = self.environment_controller.create(input_dict_1) self.environment_ids.append(environment_obj_1.id) # Create environment definition for object 2 definition_path_2 = os.path.join(self.environment_controller.home, "Dockerfile2") with open(definition_path_2, "wb") as f: f.write(to_bytes("FROM python:3.4-alpine")) input_dict_2 = { "paths": [definition_path_2 + ">Dockerfile"], } # Create second environment in the project environment_obj_2 = self.environment_controller.create(input_dict_2) self.environment_ids.append(environment_obj_2.id) # List all environments and ensure they exist result = self.environment_controller.list() assert len(result) == 2 and \ environment_obj_1 in result and \ environment_obj_2 in result def test_update(self): self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() # Create environment definition definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) input_dict = { "paths": [definition_filepath], } # Create environment in the project environment_obj = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj.id) # Test success update new_name = "test name" new_description = "test description" result = self.environment_controller.update( environment_obj.id, name=new_name, description=new_description) assert result assert isinstance(result, Environment) assert result.name == new_name assert result.description == new_description # Test failed update failed = False try: self.environment_controller.update("random_id", name=new_name, description=new_description) except EnvironmentDoesNotExist: failed = True assert failed @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_delete(self): self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() # Create environment definition definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) input_dict = { "paths": [definition_filepath], } # Create environment in the project environment_obj = self.environment_controller.create(input_dict) # Delete environment in the project result = self.environment_controller.delete(environment_obj.id) # Check if environment retrieval throws error thrown = False try: self.environment_controller.dal.environment.get_by_id( environment_obj.id) except EntityNotFound: thrown = True assert result == True and \ thrown == True @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_stop_failure(self): # 1) Test failure with RequiredArgumentMissing # 2) Test failure with TooManyArgumentsFound self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() # 1) Test option 1 failed = False try: self.environment_controller.stop() except RequiredArgumentMissing: failed = True assert failed # 2) Test option 2 failed = False try: self.environment_controller.stop(run_id="hello", match_string="there") except TooManyArgumentsFound: failed = True assert failed @pytest_docker_environment_failed_instantiation(test_datmo_dir) def test_stop_success(self): # TODO: test more run options # 1) Test run_id input to stop # 2) Test match_string input to stop # 3) Test all input to stop # 4) Test if the image was removed by stop self.project_controller.init("test5", "test description") self.environment_controller = EnvironmentController() # Create environment definition definition_filepath = os.path.join(self.environment_controller.home, "Dockerfile") with open(definition_filepath, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine")) run_options = { "command": ["sh", "-c", "echo yo"], "ports": ["8888:8888"], "name": "datmo-task-" + self.environment_controller.model.id + "-" + "test", "volumes": None, "mem_limit": "4g", "detach": False, "stdin_open": False, "tty": False, "api": False } # Create environment definition env_def_path = os.path.join(self.project_controller.home, "Dockerfile") random_text = str(uuid.uuid1()) with open(env_def_path, "wb") as f: f.write(to_bytes("FROM python:3.5-alpine" + os.linesep)) f.write(to_bytes(str("RUN echo " + random_text))) input_dict = { "paths": [definition_filepath], } # Create environment in the project environment_obj = self.environment_controller.create(input_dict) self.environment_ids.append(environment_obj.id) log_filepath = os.path.join(self.project_controller.home, "task.log") # Build environment in the project _ = self.environment_controller.build(environment_obj.id) # 1) Test option 1 _, run_id, _ = \ self.environment_controller.run(environment_obj.id, run_options, log_filepath) return_code = self.environment_controller.stop( run_id=run_id, environment_id=environment_obj.id) assert return_code # 2) Test option 2 # Rebuild environment in the project _ = self.environment_controller.build(environment_obj.id) _, _, _ = \ self.environment_controller.run(environment_obj.id, run_options, log_filepath) return_code = self.environment_controller.stop( match_string="datmo-task-" + self.environment_controller.model.id, environment_id=environment_obj.id) assert return_code # 3) Test option 3 # Rebuild environment in the project _ = self.environment_controller.build(environment_obj.id) _, _, _ = \ self.environment_controller.run(environment_obj.id, run_options, log_filepath) run_options_2 = { "command": ["sh", "-c", "echo yo"], "ports": ["8889:8889"], "name": "datmo-task-" + self.environment_controller.model.id + "-" + "test2", "volumes": None, "mem_limit": "4g", "detach": False, "stdin_open": False, "tty": False, "api": False } _, _, _ = \ self.environment_controller.run(environment_obj.id, run_options_2, log_filepath) return_code = self.environment_controller.stop( all=True, environment_id=environment_obj.id) assert return_code def test_exists(self): # Test failure, not initialized failed = False try: _ = self.environment_controller.create({}) except: failed = True assert failed # Setup self.__setup() environment_obj = self.environment_controller.create({}) self.environment_ids.append(environment_obj.id) # Check by environment id result = self.environment_controller.exists( environment_id=environment_obj.id) assert result # Check by unique hash result = self.environment_controller.exists( environment_unique_hash=environment_obj.unique_hash) assert result # Test with wrong environment id result = self.environment_controller.exists( environment_id='test_wrong_env_id') assert not result def test_calculate_project_environment_hash(self): # Setup self.__setup() # Test hashing the default (with hardware info) result = self.environment_controller._calculate_project_environment_hash( ) assert result # Test hashing the default Dockerfile result = self.environment_controller._calculate_project_environment_hash( save_hardware_file=False) assert result == "1e32ff083520f792cbe4bafdc2de2a01" # Test if hash is the same as that of create environment_obj = self.environment_controller.create( {}, save_hardware_file=False) self.environment_ids.append(environment_obj.id) result = self.environment_controller._calculate_project_environment_hash( save_hardware_file=False) assert result == "1e32ff083520f792cbe4bafdc2de2a01" assert result == environment_obj.unique_hash # Test if the hash is the same if the same file is passed in as an input input_dict = { "paths": [self.definition_filepath, self.random_filepath] } environment_obj_1 = self.environment_controller.create( input_dict, save_hardware_file=False) self.environment_ids.append(environment_obj_1.id) result = self.environment_controller._calculate_project_environment_hash( save_hardware_file=False) assert result == "1e32ff083520f792cbe4bafdc2de2a01" assert result == environment_obj_1.unique_hash def test_has_unstaged_changes(self): # Setup self.__setup() environment_obj = self.environment_controller.create({}) self.environment_ids.append(environment_obj.id) # Check for no unstaged changes result = self.environment_controller._has_unstaged_changes() assert not result # Make a change to the file (update python version) with open( os.path.join( self.environment_controller.file_driver. environment_directory, "Dockerfile"), "wb") as f: f.write(to_bytes("FROM python:3.6-alpine")) # Check again, should have unstaged changes result = self.environment_controller._has_unstaged_changes() assert result def test_check_unstaged_changes(self): # Setup self.__setup() environment_obj = self.environment_controller.create({}) self.environment_ids.append(environment_obj.id) # 1) After commiting the changes # Check for no unstaged changes because already committed result = self.environment_controller.check_unstaged_changes() assert not result # Add a new file with open( os.path.join( self.environment_controller.file_driver. environment_directory, "test2"), "wb") as f: f.write(to_bytes("cool")) # 2) Not commiting the changes, should error and raise UnstagedChanges failed = False try: self.environment_controller.check_unstaged_changes() except UnstagedChanges: failed = True assert failed # Remove new file os.remove( os.path.join( self.environment_controller.file_driver.environment_directory, "test2")) # 3) Files are the same as before but no new commit, should have no unstaged changes result = self.environment_controller.check_unstaged_changes() assert not result # 4) Remove another file, now it is different and should have unstaged changes os.remove( os.path.join( self.environment_controller.file_driver.environment_directory, "test")) failed = False try: self.environment_controller.check_unstaged_changes() except UnstagedChanges: failed = True assert failed # 5) Remove the rest of the files, now it is empty and should return as already staged os.remove( os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile")) result = self.environment_controller.check_unstaged_changes() assert not result def test_checkout(self): # Setup and create all environment files self.__setup() # Create environment to checkout to with defaults environment_obj = self.environment_controller.create({}) self.environment_ids.append(environment_obj.id) # Checkout success with there are no unstaged changes result = self.environment_controller.checkout(environment_obj.id) assert result current_hash = self.environment_controller._calculate_project_environment_hash( ) assert environment_obj.unique_hash == current_hash # Check the filenames as well because the hash does not take this into account assert os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "test")) assert os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile")) assert not os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "datmoDockerfile")) assert not os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "hardware_info")) # Change file contents to make it unstaged with open(self.definition_filepath, "wb") as f: f.write(to_bytes("new content")) # Checkout failure with unstaged changes failed = False try: _ = self.environment_controller.checkout(environment_obj.id) except UnstagedChanges: failed = True assert failed # Create new environment to checkout to with defaults (no hardware) environment_obj_1 = self.environment_controller.create( {}, save_hardware_file=False) self.environment_ids.append(environment_obj_1.id) # Checkout success with there are no unstaged changes result = self.environment_controller.checkout(environment_obj.id) assert result current_hash = self.environment_controller._calculate_project_environment_hash( ) assert environment_obj.unique_hash == current_hash assert environment_obj_1.unique_hash != current_hash # Check the filenames as well because the hash does not take this into account assert os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "test")) assert os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "Dockerfile")) assert not os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "datmoDockerfile")) assert not os.path.isfile( os.path.join( self.environment_controller.file_driver.environment_directory, "hardware_info"))
class SnapshotController(BaseController): """SnapshotController inherits from BaseController and manages business logic related to snapshots Parameters ---------- home : str home path of the project Attributes ---------- code : datmo.core.controller.code.code.CodeController file_collection : datmo.core.controller.file.file_collection.FileCollectionController environment : datmo.core.controller.environment.environment.EnvironmentController Methods ------- create(dictionary) Create a snapshot within the project checkout(id) Checkout to a specific snapshot within the project list(session_id=None) List all snapshots present within the project based on given filters delete(id) Delete the snapshot specified from the project """ def __init__(self): super(SnapshotController, self).__init__() self.code_controller = CodeController() self.file_collection_controller = FileCollectionController() self.environment_controller = EnvironmentController() if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.snapshot.__init__")) def current_snapshot(self): """Get current snapshot and return if no unstaged changes Returns ------- datmo.core.entity.snapshot.Snapshot or None current snapshot object state if not unstaged and exists. if DNE return None (should never occur) Raises ------ UnstagedChanges if there are unstaged changes error out because no current snapshot """ current_code_obj = self.code_controller.current_code() current_environment_obj = self.environment_controller.current_environment( ) current_file_collection_obj = self.file_collection_controller.current_file_collection( ) # If snapshot object with required args already exists, return it query = { "model_id": self.model.id, "code_id": current_code_obj.id, "environment_id": current_environment_obj.id, "file_collection_id": current_file_collection_obj.id } results = self.dal.snapshot.query(query, sort_key="created_at", sort_order="descending") return results[ 0] if results else None # Should never return None, unless DB is out of sync def create(self, dictionary): """Create snapshot object Parameters ---------- dictionary : dict for each of the 5 key components, this function will search for one of the variables below starting from the top. Default functionality is described below for each component as well for reference if none of the variables are given. code : code_id : str, optional code reference associated with the snapshot; if not provided will look to inputs below for code creation commit_id : str, optional commit id provided by the user if already available Default ------- commits will be taken and code created via the CodeController and are added to the snapshot at the time of snapshot creation environment : environment_id : str, optional id for environment used to create snapshot workspace : str, optional type of workspace being used environment_paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") Default ------- default environment files will be searched and environment will be created with the EnvironmentController and added to the snapshot at the time of snapshot creation file_collection : file_collection_id : str, optional file collection associated with the snapshot paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file:hello", "/path/to/file2", "/path/to/dir:newdir") Default ------- paths will be considered empty ([]), and the FileCollectionController will create a blank FileCollection that is empty. config : config : dict, optional key, value pairs of configurations config_filepath : str, optional absolute filepath to configuration parameters file config_filename : str, optional name of file with configuration parameters Default ------- config will be considered empty ({}) and saved to the snapshot stats : stats : dict, optional key, value pairs of metrics and statistics stats_filepath : str, optional absolute filepath to stats parameters file stats_filename : str, optional name of file with metrics and statistics. Default ------- stats will be considered empty ({}) and saved to the snapshot for the remaining optional arguments it will search for them in the input dictionary message : str long description of snapshot session_id : str, optional session id within which snapshot is created, will overwrite default if given task_id : str, optional task id associated with snapshot label : str, optional short description of snapshot visible : bool, optional True if visible to user via list command else False Returns ------- datmo.core.entity.snapshot.Snapshot snapshot object with all relevant parameters Raises ------ RequiredArgumentMissing if required arguments are not given by the user FileIOError if files are not present or there is an error in File IO """ # Validate Inputs create_dict = { "model_id": self.model.id, "session_id": self.current_session.id, } validate("create_snapshot", dictionary) # Message must be present if "message" in dictionary: create_dict['message'] = dictionary['message'] else: raise RequiredArgumentMissing( __("error", "controller.snapshot.create.arg", "message")) # Code setup self._code_setup(dictionary, create_dict) # Environment setup self._env_setup(dictionary, create_dict) # File setup self._file_setup(dictionary, create_dict) # Config setup self._config_setup(dictionary, create_dict) # Stats setup self._stats_setup(dictionary, create_dict) # If snapshot object with required args already exists, return it # DO NOT create a new snapshot with the same required arguments results = self.dal.snapshot.query({ "model_id": create_dict["model_id"], "code_id": create_dict['code_id'], "environment_id": create_dict['environment_id'], "file_collection_id": create_dict['file_collection_id'], "config": create_dict['config'], "stats": create_dict['stats'] }) if results: return results[0] # Optional args for Snapshot entity optional_args = ["task_id", "label", "visible"] for optional_arg in optional_args: if optional_arg in dictionary: create_dict[optional_arg] = dictionary[optional_arg] # Create snapshot and return return self.dal.snapshot.create(Snapshot(create_dict)) def create_from_task(self, message, task_id, label=None, config=None, stats=None): """Create snapshot from a completed task. # TODO: enable create from task DURING a run Parameters ---------- message : str long description of snapshot task_id : str task object to use to create snapshot label: str, optional short description of snapshot config : dict, optional key, value pairs of configurations stats : dict, optional key, value pairs of metrics and statistics Returns ------- datmo.core.entity.snapshot.Snapshot snapshot object with all relevant parameters Raises ------ TaskNotComplete if task specified has not been completed """ validate( "create_snapshot_from_task", { "message": message, "task_id": task_id, "label": label, "config": config, "stats": stats }) task_obj = self.dal.task.get_by_id(task_id) if not task_obj.status and not task_obj.after_snapshot_id: raise TaskNotComplete( __("error", "controller.snapshot.create_from_task", str(task_obj.id))) after_snapshot_obj = self.dal.snapshot.get_by_id( task_obj.after_snapshot_id) snapshot_update_dict = { "id": task_obj.after_snapshot_id, "message": message, "visible": True } if label: snapshot_update_dict["label"] = label if config: snapshot_update_dict["config"] = config if stats: snapshot_update_dict["stats"] = stats else: # Append to any existing stats already present snapshot_update_dict["stats"] = {} if after_snapshot_obj.stats is not None: snapshot_update_dict["stats"].update(after_snapshot_obj.stats) if task_obj.results is not None: snapshot_update_dict["stats"].update(task_obj.results) if snapshot_update_dict["stats"] == {}: snapshot_update_dict["stats"] = None return self.dal.snapshot.update(snapshot_update_dict) def check_unstaged_changes(self): """Checks if there exists any unstaged changes for the snapshot in the project. Returns ------- bool False if it's already staged else error Raises ------ UnstagedChanges error if not there exists unstaged changes in snapshot """ try: # check for unstaged changes in code self.code_controller.check_unstaged_changes() # check for unstaged changes in environment self.environment_controller.check_unstaged_changes() # check for unstaged changes in file self.file_collection_controller.check_unstaged_changes() return False except UnstagedChanges: raise UnstagedChanges() def checkout(self, snapshot_id): # Get snapshot object snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id) code_obj = self.dal.code.get_by_id(snapshot_obj.code_id) file_collection_obj = self.dal.file_collection.\ get_by_id(snapshot_obj.file_collection_id) environment_obj = self.dal.environment. \ get_by_id(snapshot_obj.environment_id) # Check for unstaged changes, if so error with UnstagedChanges self.check_unstaged_changes() # Checkout code to the relevant commit ref code_checkout_success = self.code_controller.checkout(code_obj.id) # Checkout environment to relevant environment id environment_checkout_success = self.environment_controller.checkout( environment_obj.id) # Checkout files to relevant file collection id file_checkout_success = self.file_collection_controller.checkout( file_collection_obj.id) return (code_checkout_success and environment_checkout_success and file_checkout_success) def list(self, session_id=None, visible=None, sort_key=None, sort_order=None): query = {} if session_id: try: self.dal.session.get_by_id(session_id) except EntityNotFound: raise SessionDoesNotExist( __("error", "controller.snapshot.list", session_id)) query['session_id'] = session_id if visible is not None and isinstance(visible, bool): query['visible'] = visible return self.dal.snapshot.query(query, sort_key, sort_order) def update(self, snapshot_id, config=None, stats=None, message=None, label=None, visible=None): """Update the snapshot metadata""" if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) update_snapshot_input_dict = {'id': snapshot_id} validate( "update_snapshot", { "config": config, "stats": stats, "message": message, "label": label, "visible": visible }) if config is not None: update_snapshot_input_dict['config'] = config if stats is not None: update_snapshot_input_dict['stats'] = stats if message is not None: update_snapshot_input_dict['message'] = message if label is not None: update_snapshot_input_dict['label'] = label if visible is not None: update_snapshot_input_dict['visible'] = visible return self.dal.snapshot.update(update_snapshot_input_dict) def get(self, snapshot_id): """Get snapshot object and return Parameters ---------- snapshot_id : str id for the snapshot you would like to get Returns ------- datmo.core.entity.snapshot.Snapshot core snapshot object Raises ------ DoesNotExist snapshot does not exist """ try: return self.dal.snapshot.get_by_id(snapshot_id) except EntityNotFound: raise DoesNotExist() def get_files(self, snapshot_id, mode="r"): """Get list of file objects for snapshot id. Parameters ---------- snapshot_id : str id for the snapshot you would like to get file objects for mode : str file open mode (default is "r" to open file for read) Returns ------- list list of python file objects Raises ------ DoesNotExist snapshot object does not exist """ try: snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id) except EntityNotFound: raise DoesNotExist() file_collection_obj = self.dal.file_collection.get_by_id( snapshot_obj.file_collection_id) return self.file_driver.get_collection_files( file_collection_obj.filehash, mode=mode) def delete(self, snapshot_id): """Delete all traces of a snapshot Parameters ---------- snapshot_id : str id for the snapshot to remove Returns ------- bool True if success Raises ------ RequiredArgumentMissing if the provided snapshot_id is None """ if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) return self.dal.snapshot.delete(snapshot_id) def _code_setup(self, incoming_dictionary, create_dict): """ Set the code_id by using: 1. code_id 2. commit_id string, which creates a new code_id 3. create a new code id Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity """ if "code_id" in incoming_dictionary: create_dict['code_id'] = incoming_dictionary['code_id'] elif "commit_id" in incoming_dictionary: create_dict['code_id'] = self.code_controller.\ create(commit_id=incoming_dictionary['commit_id']).id else: create_dict['code_id'] = self.code_controller.create().id def _env_setup(self, incoming_dictionary, create_dict): """ TODO: Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity """ if "environment_id" in incoming_dictionary: create_dict['environment_id'] = incoming_dictionary[ 'environment_id'] elif "environment_paths" in incoming_dictionary: create_dict['environment_id'] = self.environment_controller.create( { "paths": incoming_dictionary['environment_paths'] }).id else: # create some default environment create_dict['environment_id'] = self.environment_controller.\ create({}).id def _file_setup(self, incoming_dictionary, create_dict): """ Checks for user inputs and uses the file collection controller to obtain the file collection id and create the necessary collection Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity """ if "file_collection_id" in incoming_dictionary: create_dict['file_collection_id'] = incoming_dictionary[ 'file_collection_id'] elif "paths" in incoming_dictionary: # transform file paths to file_collection_id create_dict['file_collection_id'] = self.file_collection_controller.\ create(incoming_dictionary['paths']).id else: # create some default file collection create_dict['file_collection_id'] = self.file_collection_controller.\ create([]).id def _config_setup(self, incoming_dictionary, create_dict): """ Fills in snapshot config by having one of the following: 1. config = JSON object 2. config_filepath = some location where a json file exists 3. config_filename = just the file name Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity Raises ------ FileIOError """ if "config" in incoming_dictionary: create_dict['config'] = incoming_dictionary['config'] elif "config_filepath" in incoming_dictionary: if not os.path.isfile(incoming_dictionary['config_filepath']): raise FileIOError( __("error", "controller.snapshot.create.file_config")) # If path exists transform file to config dict config_json_driver = JSONStore( incoming_dictionary['config_filepath']) create_dict['config'] = config_json_driver.to_dict() elif "config_filename" in incoming_dictionary: config_filename = incoming_dictionary['config_filename'] create_dict['config'] = self._find_in_filecollection( config_filename, create_dict['file_collection_id']) else: config_filename = "config.json" create_dict['config'] = self._find_in_filecollection( config_filename, create_dict['file_collection_id']) def _stats_setup(self, incoming_dictionary, create_dict): """Fills in snapshot stats by having one of the following: 1. stats = JSON object 2. stats_filepath = some location where a json file exists 3. stats_filename = just the file name Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity Raises ------ FileIOError """ if "stats" in incoming_dictionary: create_dict['stats'] = incoming_dictionary['stats'] elif "stats_filepath" in incoming_dictionary: if not os.path.isfile(incoming_dictionary['stats_filepath']): raise FileIOError( __("error", "controller.snapshot.create.file_stat")) # If path exists transform file to config dict stats_json_driver = JSONStore( incoming_dictionary['stats_filepath']) create_dict['stats'] = stats_json_driver.to_dict() elif "stats_filename" in incoming_dictionary: stats_filename = incoming_dictionary['stats_filename'] create_dict['stats'] = self._find_in_filecollection( stats_filename, create_dict['file_collection_id']) else: stats_filename = "stats.json" create_dict['stats'] = self._find_in_filecollection( stats_filename, create_dict['file_collection_id']) def _find_in_filecollection(self, file_to_find, file_collection_id): """ Attempts to find a file within the file collection Returns ------- dict output dictionary of the JSON file """ file_collection_obj = self.file_collection_controller.dal.file_collection.\ get_by_id(file_collection_id) file_collection_path = \ self.file_collection_controller.file_driver.get_collection_path( file_collection_obj.filehash) # find all of the possible paths it could exist possible_paths = [os.path.join(self.home, file_to_find)] + \ [os.path.join(self.home, item[0], file_to_find) for item in os.walk(file_collection_path)] existing_possible_paths = [ possible_path for possible_path in possible_paths if os.path.isfile(possible_path) ] if not existing_possible_paths: # TODO: Add some info / warning that no file was found # create some default stats return {} else: # If any such path exists, transform file to stats dict json_file = JSONStore(existing_possible_paths[0]) return json_file.to_dict()