def delete_image( cls, labbook: LabBook, username: str, override_image_tag: Optional[str] = None) -> Tuple[LabBook, bool]: """ Delete the Docker image for the given LabBook Args: labbook: Subject LabBook. override_image_tag: Tag of docker image (optional) username: The current logged in username Returns: A tuple containing the labbook, docker image id. """ image_name = override_image_tag or cls.labbook_image_name( labbook, username) # We need to remove any images pertaining to this labbook before triggering a build. logger.info(f"Deleting docker image for {str(labbook)}") try: get_docker_client().images.get(name=image_name) get_docker_client().images.remove(image_name) logger.info( f"Deleted docker image for {str(labbook)}: {image_name}") except docker.errors.ImageNotFound: logger.warning( f"Could not delete docker image for {str(labbook)}: {image_name} not found" ) except Exception as e: logger.error("Error deleting docker images for {str(lb)}: {e}") return labbook, False return labbook, True
def delete_image(cls, labbook: LabBook, override_image_tag: Optional[str] = None, username: Optional[str] = None) -> Tuple[LabBook, bool]: """ Delete the Docker image for the given LabBook Args: labbook: Subject LabBook. override_image_tag: Tag of docker image (optional) username: The current logged in username Returns: A tuple containing the labbook, docker image id. """ owner = InventoryManager().query_owner(labbook) image_name = override_image_tag or infer_docker_image_name( labbook_name=labbook.name, owner=owner, username=username) # We need to remove any images pertaining to this labbook before triggering a build. try: get_docker_client().images.get(name=image_name) get_docker_client().images.remove(image_name) except docker.errors.ImageNotFound: pass except Exception as e: logger.error("Error deleting docker images for {str(lb)}: {e}") return labbook, False return labbook, True
def _remove_docker_image(image_name: str) -> None: try: get_docker_client().images.get(name=image_name) get_docker_client().images.remove(image_name) except docker.errors.ImageNotFound: logger.warning( f"Attempted to delete Docker image {image_name}, but not found")
def test_build_and_start_and_stop_labbook_container( self, mock_config_file): erm = RepositoryManager(mock_config_file[0]) erm.update_repositories() erm.index_repositories() # Create a labbook lb = InventoryManager(mock_config_file[0]).create_labbook( 'unittester', 'unittester', 'unittest-start-stop-job', description="Testing docker building.") cm = ComponentManager(lb) cm.add_base(gtmcore.fixtures.ENV_UNIT_TEST_REPO, 'quickstart-jupyterlab', 2) ib = ImageBuilder(lb) ib.assemble_dockerfile(write=True) client = get_docker_client() img_list = client.images.list() try: from gtmcore.container.utils import infer_docker_image_name owner = InventoryManager().query_owner(lb) client.images.remove( infer_docker_image_name(labbook_name=lb.name, owner=owner, username='******')) except: pass docker_kwargs = { 'path': lb.root_dir, 'nocache': True, 'username': '******' } image_id = jobs.build_labbook_image(**docker_kwargs) startc_kwargs = { 'root': lb.root_dir, 'config_path': lb.client_config.config_file, 'username': '******' } # Start the docker container, and then wait till it's done. container_id = jobs.start_labbook_container(**startc_kwargs) assert get_docker_client().containers.get( container_id).status == 'running' # Stop the docker container, and wait until that is done. jobs.stop_labbook_container(container_id) with pytest.raises(Exception): # Should not be found because the stop job cleans up get_docker_client().containers.get(container_id)
def stop_mitm_proxy(cls, labbook_container_name: str) -> Optional[str]: """Stop the MITM proxy. Destroy container. Delete volume. Args: labbook_container_name: the specific target running a dev tool Returns: ip address of the mitm_proxy for removing the route (if configured) else None """ container_id = MITMProxyOperations.get_mitmcontainerid( labbook_container_name) # stop the mitm container docker_client = get_docker_client() mitm_container = docker_client.containers.get(container_id) mitm_container.stop() mitm_container.remove() mitm_endpoint = cls.get_mitmendpoint(labbook_container_name) # unregister the proxy in KV store redis_conn = redis.Redis(db=1) hkey = cls.get_mitm_redis_key(labbook_container_name) redis_conn.delete(hkey) return mitm_endpoint
def start_rserver(labbook: LabBook, username: str, tag: Optional[str] = None, check_reachable: bool = True) -> None: """ Main entrypoint to launch rstudio-server. Note, the caller must determine for themselves the host and port. Raises an exception if there's a problem. Returns: Path to rstudio-server """ owner = InventoryManager().query_owner(labbook) lb_key = tag or infer_docker_image_name( labbook_name=labbook.name, owner=owner, username=username) docker_client = get_docker_client() lb_container = docker_client.containers.get(lb_key) if lb_container.status != 'running': raise GigantumException(f"{str(labbook)} container is not running") rserver_ps = ps_search(lb_container, 'rserver') if len(rserver_ps) == 1: # we have an existing rstudio-server instance return elif len(rserver_ps) == 0: _start_rserver_process(lb_container) else: # If "ps aux" for rserver returns multiple hits - this should never happen. for n, l in enumerate(rserver_ps): logger.error( f'Multiple RStudio-Server instances - ({n+1} of {len(rserver_ps)}) - {l}' ) raise ValueError( f'Multiple ({len(rserver_ps)}) RStudio Server instances detected')
def build_lb_image_for_env(mock_config_with_repo): # Create a labook im = InventoryManager(mock_config_with_repo[0]) lb = im.create_labbook('unittester', 'unittester', "containerunittestbookenv", description="Testing environment functions.") # Create Component Manager cm = ComponentManager(lb) # Add a component cm.add_base(ENV_UNIT_TEST_REPO, ENV_UNIT_TEST_BASE, ENV_UNIT_TEST_REV) ib = ImageBuilder(lb) ib.assemble_dockerfile(write=True) client = get_docker_client() client.containers.prune() try: lb, docker_image_id = ContainerOperations.build_image(labbook=lb, username="******") yield lb, 'unittester' finally: shutil.rmtree(lb.root_dir) # Remove image if it's still there try: client.images.remove(docker_image_id, force=True, noprune=False) except: pass
def start_bundled_app(labbook: LabBook, username: str, command: str, tag: Optional[str] = None) -> None: """ Method to start a bundled app by running the user specified command inside the running Project container Args: labbook: labbook instance username: current logged in user command: user specified command to run tag: optional tag for the container override id Returns: """ if len(command) == 0: return owner = InventoryManager().query_owner(labbook) lb_key = tag or infer_docker_image_name(labbook_name=labbook.name, owner=owner, username=username) docker_client = get_docker_client() lb_container = docker_client.containers.get(lb_key) if lb_container.status != 'running': raise GigantumException(f"{str(labbook)} container is not running. Start it before starting a bundled app.") lb_container.exec_run(f'sh -c "{command}"', detach=True, user='******')
def stop_mitm_proxy(cls, lb_endpoint: str) -> str: """Stop the MITM proxy. Destroy container. Delete volume. Args: lb_endpoint: the specific target running a dev tool Returns: ip address of the mitm_proxy for removing the route """ container_id = MITMProxyOperations.get_mitmcontainerid(lb_endpoint) # stop the mitm container docker_client = get_docker_client() mitm_container = docker_client.containers.get(container_id) mitm_container.stop() mitm_container.remove() # unregister the proxy in KV store redis_conn = redis.Redis(db=1) mitm_endpoint = redis_conn.get(f"{lb_endpoint}-mitm-endpoint").decode() redis_conn.delete(f"{lb_endpoint}-mitm-endpoint") redis_conn.delete(f"{lb_endpoint}-mitm-container_id") redis_conn.delete(f"{lb_endpoint}-mitm-key") return mitm_endpoint
def build_lb_image_for_env_conda(mock_config_with_repo): """A fixture that installs an old version of matplotlib and latest version of requests to increase code coverage""" im = InventoryManager(mock_config_with_repo[0]) lb = im.create_labbook('unittester', 'unittester', "containerunittestbookenvconda", description="Testing environment functions.") cm = ComponentManager(lb) cm.add_base(ENV_UNIT_TEST_REPO, ENV_UNIT_TEST_BASE, ENV_UNIT_TEST_REV) cm.add_packages('conda3', [{'package': 'python-coveralls', 'version': '2.7.0'}]) ib = ImageBuilder(lb) ib.assemble_dockerfile(write=True) client = get_docker_client() client.containers.prune() try: lb, docker_image_id = ContainerOperations.build_image(labbook=lb, username="******") yield lb, 'unittester' finally: shutil.rmtree(lb.root_dir) try: client.images.remove(docker_image_id, force=True, noprune=False) except: pass
def start_mitm_proxy(cls, lb_endpoint: str, key: str) -> str: """Launch a proxy cointainer between client and labbook. Args: lb_endpoint: the specific target running a dev tool key: a unique key for this instance (related to the monitored Project container - e.g., RStudio) Returns: str that contains the proxy endpoint as http://{ip}:{port} """ # setup the environment - note that UID is obtained inside the container based on labmanager_share_vol # (mounted at /mnt/share) env_var = [f"LBENDPOINT={lb_endpoint}", f"PROXYID={key}"] nametag = f"gmitmproxy.{key}" volumes_dict = { 'labmanager_share_vol': {'bind': '/mnt/share', 'mode': 'rw'} } docker_client = get_docker_client() container = docker_client.containers.run("gigantum/mitmproxy_proxy:" + CURRENT_MITMPROXY_TAG, detach=True, init=True, name=nametag, volumes=volumes_dict, environment=env_var) # For now, we hammer repeatedly for 5 seconds # Plan for a better solution is mentioned in #434 for _ in range(50): time.sleep(.1) # Hope that our container is actually up and reload container.reload() container_ip = container.attrs['NetworkSettings']['Networks']['bridge']['IPAddress'] if container_ip: break if not container_ip: raise GigantumException("Unable to get mitmproxy_proxy IP address.") mitm_endpoint = f'http://{container_ip}:8079' # register the proxy in KV store redis_conn = redis.Redis(db=1) redis_conn.set(f"{lb_endpoint}-mitm-endpoint", mitm_endpoint) redis_conn.set(f"{lb_endpoint}-mitm-container_id", container.id) redis_conn.set(f"{lb_endpoint}-mitm-key", key) # make sure proxy is up. for timeout in range(10): time.sleep(1) ec, new_ps_list = container.exec_run( f'sh -c "ps aux | grep nginx | grep -v \' grep \'"') new_ps_list = new_ps_list.decode().split('\n') if any('nginx' in l for l in new_ps_list): logger.info(f"Proxy to rserver started within {timeout + 1} seconds") break else: raise ValueError('mitmproxy failed to start after 10 seconds') return mitm_endpoint
def test_list_versions_from_fallback(self, mock_config_with_repo): """Test list_versions command""" username = "******" im = InventoryManager(mock_config_with_repo[0]) lb = im.create_labbook( 'unittest', 'unittest', 'labbook-unittest-01', description="From mock_config_from_repo fixture") # Create Component Manager cm = ComponentManager(lb) # Add a component cm.add_base(ENV_UNIT_TEST_REPO, ENV_UNIT_TEST_BASE, ENV_UNIT_TEST_REV) ib = ImageBuilder(lb) ib.assemble_dockerfile(write=True) client = get_docker_client() try: lb, docker_image_id = ContainerOperations.build_image( labbook=lb, username=username) # Test lookup mrg = PipPackageManager() result = mrg.search("peppercorn", lb, username) assert len(result) == 2 result = mrg.search("gigantum", lb, username) assert len(result) == 4 assert result[0] == "gigantum" # Delete image client.images.remove(docker_image_id, force=True, noprune=False) # Test lookup still works mrg = PipPackageManager() result = mrg.search("peppercorn", lb, username) assert len(result) == 2 result = mrg.search("gigantum", lb, username) assert len(result) == 4 assert result[0] == "gigantum" finally: shutil.rmtree(lb.root_dir) # Remove image if it's still there try: client.images.remove(docker_image_id, force=True, noprune=False) except: pass
def run_command( cls, cmd_text: str, labbook: LabBook, username: str, override_image_tag: Optional[str] = None, fallback_image: str = None) -> bytes: """Run a command executed in the context of the LabBook's docker image. Args: cmd_text: Content of command to be executed. labbook: Subject labbook username: Optional active username override_image_tag: If set, does not automatically infer container name. fallback_image: If LabBook image can't be found, use this one instead. Returns: A tuple containing the labbook, Docker container id, and port mapping. """ image_name = override_image_tag or cls.labbook_image_name(labbook, username) # Get a docker client instance client = get_docker_client() # Verify image name exists. If it doesn't, fallback and use the base image try: client.images.get(image_name) except docker.errors.ImageNotFound: # Image not found...assume build has failed and fallback to base if not fallback_image: raise logger.warning(f"LabBook image not available for package query." f"Falling back to base image `{fallback_image}`.") image_name = fallback_image t0 = time.time() try: # Note, for container docs see: http://docker-py.readthedocs.io/en/stable/containers.html container = client.containers.run(image_name, cmd_text, entrypoint=[], remove=False, detach=True, stdout=True) while container.status != "exited": time.sleep(.25) container.reload() result = container.logs(stdout=True, stderr=False) container.remove(v=True) except docker.errors.ContainerError as e: tfail = time.time() logger.error(f'Command ({cmd_text}) failed after {tfail-t0:.2f}s - ' f'output: {e.exit_status}, {e.stderr}') raise ContainerException(e) ts = time.time() if ts - t0 > 5.0: logger.warning(f'Command ({cmd_text}) in {str(labbook)} took {ts-t0:.2f} sec') return result
def build_lb_image_for_jupyterlab(mock_config_with_repo): with patch.object(Configuration, 'find_default_config', lambda self: mock_config_with_repo[0]): im = InventoryManager(mock_config_with_repo[0]) lb = im.create_labbook('unittester', 'unittester', "containerunittestbook") # Create Component Manager cm = ComponentManager(lb) # Add a component cm.add_base(ENV_UNIT_TEST_REPO, ENV_UNIT_TEST_BASE, ENV_UNIT_TEST_REV) cm.add_packages("pip", [{"manager": "pip", "package": "requests", "version": "2.18.4"}]) ib = ImageBuilder(lb) docker_lines = ib.assemble_dockerfile(write=True) assert 'RUN pip install requests==2.18.4' in docker_lines assert all(['==None' not in l for l in docker_lines.split()]) assert all(['=None' not in l for l in docker_lines.split()]) client = get_docker_client() client.containers.prune() assert os.path.exists(os.path.join(lb.root_dir, '.gigantum', 'env', 'entrypoint.sh')) try: lb, docker_image_id = ContainerOperations.build_image(labbook=lb, username="******") lb, container_id = ContainerOperations.start_container(lb, username="******") assert isinstance(container_id, str) yield lb, ib, client, docker_image_id, container_id, None, 'unittester' try: _, s = ContainerOperations.stop_container(labbook=lb, username="******") except docker.errors.APIError: client.containers.get(container_id=container_id).stop(timeout=2) s = False finally: shutil.rmtree(lb.root_dir) # Stop and remove container if it's still there try: client.containers.get(container_id=container_id).stop(timeout=2) client.containers.get(container_id=container_id).remove() except: pass # Remove image if it's still there try: ContainerOperations.delete_image(labbook=lb, username='******') client.images.remove(docker_image_id, force=True, noprune=False) except: pass try: client.images.remove(docker_image_id, force=True, noprune=False) except: pass
def get_container_ip(container_name: str) -> str: """Method to get a container IP address Args: container_name(str): Name of the container to query Returns: str """ client = get_docker_client() container = client.containers.get(container_name) return container.attrs['NetworkSettings']['Networks']['bridge'][ 'IPAddress']
def _get_cached_image(env_dir: str, image_name: str) -> Optional[str]: """ Get Docker image id for the given environment specification (if it exsits). This helps to determine if we can avoid having to rebuild the Docker image by hashing the environemnt specification and determine if it changed. Any change in content or version will cause the checksum to be different, necessitating a rebuild. If there's no change, however, we can avoid potentially costly rebuilds of the image. Args: env_dir: Environment directoryt for a LabBook image_name: Name of the LabBook Docker image Returns: docker image id (Optional) """ # Determine if we need to rebuild by testing if the environment changed cache_dir = '/mnt/gigantum/.labmanager/image-cache' if not os.path.exists(cache_dir): logger.info(f"Making environment cache at {cache_dir}") os.makedirs(cache_dir, exist_ok=True) env_cache_path = os.path.join(cache_dir, f"{image_name}.cache") m = hashlib.sha256() for root, dirs, files in os.walk(env_dir): for f in [n for n in files if '.yaml' in n]: m.update(os.path.join(root, f).encode()) m.update(open(os.path.join(root, f)).read().encode()) env_cksum = m.hexdigest() if os.path.exists(env_cache_path): old_env_cksum = open(env_cache_path).read() else: with open(env_cache_path, 'w') as cfile: cfile.write(env_cksum) return None if env_cksum == old_env_cksum: try: i = get_docker_client().images.get(name=image_name) return i.id except docker.errors.ImageNotFound: pass else: # Env checksum hash is outdated. Remove it. os.remove(env_cache_path) with open(env_cache_path, 'w') as cfile: cfile.write(env_cksum) return None
def get_container_status(labbook_name: str, owner: str, username: str) -> bool: labbook_key = infer_docker_image_name(labbook_name=labbook_name, owner=owner, username=username) try: client = get_docker_client() container = client.containers.get(labbook_key) if container.status == "running": return True else: return False except: pass return False
def get_labmanager_ip() -> Optional[str]: """Method to get the monitored lab book container's IP address on the Docker bridge network Returns: str of IP address """ client = get_docker_client() container = [ c for c in client.containers.list() if 'gigantum.labmanager' in c.name and 'gmlb-' not in c.name ][0] ip = container.attrs['NetworkSettings']['Networks']['bridge']['IPAddress'] logger.info("container {} IP: {}".format(container.name, ip)) return ip
def test_old_dockerfile_removed_when_new_build_fails( self, build_lb_image_for_jupyterlab): # Test that when a new build fails, old images are removed so they cannot be launched. my_lb = build_lb_image_for_jupyterlab[0] docker_image_id = build_lb_image_for_jupyterlab[3] my_lb, stopped = ContainerOperations.stop_container( my_lb, username="******") assert stopped olines = open(os.path.join(my_lb.root_dir, '.gigantum/env/Dockerfile')).readlines()[:6] with open(os.path.join(my_lb.root_dir, '.gigantum/env/Dockerfile'), 'w') as dockerfile: dockerfile.write('\n'.join(olines)) dockerfile.write('\nRUN /bin/false') # We need to remove cache data otherwise the following tests won't work remove_image_cache_data() with pytest.raises(ContainerBuildException): ContainerOperations.build_image(labbook=my_lb, username="******") with pytest.raises(docker.errors.ImageNotFound): owner = InventoryManager().query_owner(my_lb) get_docker_client().images.get( infer_docker_image_name(labbook_name=my_lb.name, owner=owner, username="******")) with pytest.raises(requests.exceptions.HTTPError): # Image not found so container cannot be started ContainerOperations.start_container(labbook=my_lb, username="******")
def get_container_ip(self) -> Optional[str]: """Method to get the monitored lab book container's IP address on the Docker bridge network Returns: str """ client = get_docker_client() lb_key = infer_docker_image_name(self.labbook_name, self.owner, self.user) container = client.containers.get(lb_key) ip = container.attrs['NetworkSettings']['Networks']['bridge'][ 'IPAddress'] logger.info("container {} IP: {}".format(container.name, ip)) return ip
def get_running_proxies(self) -> List[str]: """Return a list of the running gmitmproxy Returns: List of strs with proxy id. """ client = get_docker_client() clist = client.containers.list( filters={ 'ancestor': 'gigantum/mitmproxy_proxy:' + CURRENT_MITMPROXY_TAG }) retlist = [] for cont in clist: # container name is gmitmproxy.<uuid-style key> _, container_key = cont.name.split('.') retlist.append(container_key) return retlist
def get_running_proxies(cls) -> List[str]: """Return a list of the running gmitmproxy Returns: List of strs with image names for proxied dev tool containers. """ client = get_docker_client() clist = client.containers.list( filters={ 'ancestor': 'gigantum/mitmproxy_proxy:' + CURRENT_MITMPROXY_TAG }) retlist = [] for cont in clist: # container name is gmitmproxy.<mitm key> - currently the monitored container image name _, container_key = cont.name.split('.') retlist.append(container_key) return retlist
def stop_labbook_container(container_id: str) -> bool: """ Stop a running docker container. Args: container_id: ID of container to stop. Returns True if stopped, False if it was never running. """ try: client = get_docker_client() build_container = client.containers.get(container_id) build_container.stop(timeout=10) build_container.remove() return True except docker.errors.NotFound: # No container to stop, but no reason to throw an exception return False
def resolve_container_status(self, info): """Resolve the image_status field""" # Check if the container is running by looking up the container labbook_key = infer_docker_image_name( labbook_name=self.name, owner=self.owner, username=get_logged_in_username()) try: client = get_docker_client() container = client.containers.get(labbook_key) if container.status == "running": container_status = ContainerStatus.RUNNING else: container_status = ContainerStatus.NOT_RUNNING except (NotFound, requests.exceptions.ConnectionError): container_status = ContainerStatus.NOT_RUNNING return container_status.value
def docker_socket_fixture(): """Helper method to get the docker client version""" client = get_docker_client() version = client.version()['ApiVersion'] if "CIRCLECI" in os.environ: docker_host = os.environ['DOCKER_HOST'] docker_host = docker_host.replace("tcp", "https") responses.add_passthru( f"{docker_host}/v{version}/images/default-default-labbook1/json") responses.add_passthru( f"{docker_host}/v{version}/containers/default-default-labbook1/json" ) responses.add_passthru( f"{docker_host}/v{version}/images/default-default-labbook1/json") responses.add_passthru( f"{docker_host}/v{version}/containers/default-default-labbook1/json" ) responses.add_passthru( f"{docker_host}/v{version}/images/default-test-sample-repo-lb/json" ) responses.add_passthru( f"{docker_host}/v{version}/containers/default-test-sample-repo-lb/json" ) responses.add_passthru( #'http+docker://35.196.196.144:2376/v1.30/containers/default-test-sample-repo-lb/json') '{docker_host}/v{version}/containers/default-test-sample-repo-lb/json' ) else: responses.add_passthru( f"http+docker://localunixsocket/v{version}/images/default-default-labbook1/json" ) responses.add_passthru( f"http+docker://localunixsocket/v{version}/containers/default-default-labbook1/json" ) responses.add_passthru( f"http+docker://localunixsocket/v{version}/images/default-test-sample-repo-lb/json" ) responses.add_passthru( f"http+docker://localunixsocket/v{version}/containers/default-test-sample-repo-lb/json" ) yield
def reset_images(request): """A pytest fixture that checks if the test images exist and deletes them""" # Clean up images client = get_docker_client() # image should never exist before the test starts image_name = "gmlb-{}-{}".format(get_logged_in_username(), f'default-{request.param}') try: client.images.get(image_name) client.images.remove(image_name) raise ValueError("Test image exists before test started. Attempting to automatically removing image. Run again") except ImageNotFound: pass yield None try: client.images.get(image_name) client.images.remove(image_name) except ImageNotFound: pass
def helper_resolve_image_status(self, labbook): """Helper to resolve the image status of a labbook""" labbook_image_key = infer_docker_image_name( labbook_name=self.name, owner=self.owner, username=get_logged_in_username()) dispatcher = Dispatcher() lb_jobs = [ dispatcher.query_task(j.job_key) for j in dispatcher.get_jobs_for_labbook(labbook.key) ] for j in lb_jobs: logger.debug("Current job for labbook: status {}, meta {}".format( j.status, j.meta)) # First, check if image exists or not -- The first step of building an image untags any existing ones. # Therefore, we know that if one exists, there most likely is not one being built. try: client = get_docker_client() client.images.get(labbook_image_key) image_status = ImageStatus.EXISTS except (ImageNotFound, requests.exceptions.ConnectionError): image_status = ImageStatus.DOES_NOT_EXIST if any([ j.status == 'failed' and j.meta.get('method') == 'build_image' for j in lb_jobs ]): logger.debug("Image status for {} is BUILD_FAILED".format( labbook.key)) if image_status == ImageStatus.EXISTS: # The indication that there's a failed job is probably lingering from a while back, so don't # change the status to FAILED. Only do that if there is no Docker image. logger.debug( f'Got failed build_image for labbook {labbook.key}, but image exists.' ) else: image_status = ImageStatus.BUILD_FAILED if any([ j.status in ['started'] and j.meta.get('method') == 'build_image' for j in lb_jobs ]): logger.debug( f"Image status for {labbook.key} is BUILD_IN_PROGRESS") # build_image being in progress takes precedence over if image already exists (unlikely event). if image_status == ImageStatus.EXISTS: logger.warning( f'Got build_image for labbook {labbook.key}, but image exists.' ) image_status = ImageStatus.BUILD_IN_PROGRESS if any([ j.status in ['queued'] and j.meta.get('method') == 'build_image' for j in lb_jobs ]): logger.warning( f"build_image for {labbook.key} stuck in queued state") image_status = ImageStatus.BUILD_QUEUED return image_status.value
def test_success_import_export_zip(self, mock_config_with_repo): # Create new LabBook to be exported im = InventoryManager(mock_config_with_repo[0]) lb = im.create_labbook('unittester', 'unittester', "unittest-lb-for-export-import-test", description="Testing import-export.") cm = ComponentManager(lb) cm.add_base(gtmcore.fixtures.ENV_UNIT_TEST_REPO, gtmcore.fixtures.ENV_UNIT_TEST_BASE, gtmcore.fixtures.ENV_UNIT_TEST_REV) ib = ImageBuilder(lb) ib.assemble_dockerfile() # Make sure the destination user exists locally working_dir = lb.client_config.config['git']['working_directory'] os.makedirs(os.path.join(working_dir, 'unittester2', 'unittester2', 'labbooks'), exist_ok=True) lb_root = lb.root_dir with tempfile.TemporaryDirectory() as temp_dir_path: # Export the labbook export_dir = os.path.join(mock_config_with_repo[1], "export") exported_archive_path = jobs.export_labbook_as_zip( lb.root_dir, export_dir) tmp_archive_path = shutil.copy(exported_archive_path, '/tmp') # Delete the labbook shutil.rmtree(lb.root_dir) assert not os.path.exists( lb_root), f"LabBook at {lb_root} should not exist." assert os.path.exists(tmp_archive_path) # Now import the labbook as a new user, validating that the change of namespace works properly. imported_lb_path = jobs.import_labboook_from_zip( archive_path=tmp_archive_path, username='******', owner='unittester2', config_file=mock_config_with_repo[0]) assert not os.path.exists(tmp_archive_path) tmp_archive_path = shutil.copy(exported_archive_path, '/tmp') assert os.path.exists(tmp_archive_path) # New path should reflect username of new owner and user. assert imported_lb_path == lb_root.replace( '/unittester/unittester/', '/unittester2/unittester2/') import_lb = InventoryManager( mock_config_with_repo[0]).load_labbook_from_directory( imported_lb_path) ib = ImageBuilder(import_lb) ib.assemble_dockerfile(write=True) assert os.path.exists( os.path.join(imported_lb_path, '.gigantum', 'env', 'Dockerfile')) assert not import_lb.has_remote # Repeat the above, except with the original user (e.g., re-importing their own labbook) user_import_lb = jobs.import_labboook_from_zip( archive_path=tmp_archive_path, username="******", owner="unittester", config_file=mock_config_with_repo[0]) assert not os.path.exists(tmp_archive_path) # New path should reflect username of new owner and user. assert user_import_lb import_lb2 = InventoryManager( mock_config_with_repo[0]).load_labbook_from_directory( user_import_lb) # After importing, the new user (in this case "cat") should be the current, active workspace. # And be created, if necessary. assert not import_lb2.has_remote build_kwargs = { 'path': lb.root_dir, 'username': '******', 'nocache': True } docker_image_id = jobs.build_labbook_image(**build_kwargs) try: client = get_docker_client() client.images.remove(docker_image_id) except Exception as e: pprint.pprint(e) raise
def start_labbook_container(labbook_root: str, config_path: str, username: str, override_image_id: Optional[str] = None) -> str: """ Start a Docker container from a given image_name. Args: labbook_root: Root dir of labbook config_path: Path to LabBook configuration file. override_image_id: Optional explicit docker image id (do not infer). username: Username of active user. Do not use with override_image_id. Returns: Tuple containing docker container id, dict mapping of exposed ports. Raises: """ if username and override_image_id: raise ValueError( 'Argument username and override_image_id cannot both be set') lb = InventoryManager( config_file=config_path).load_labbook_from_directory(labbook_root) if not override_image_id: owner = InventoryManager().query_owner(lb) tag = infer_docker_image_name(lb.name, owner, username) else: tag = override_image_id mnt_point = labbook_root.replace('/mnt/gigantum', os.environ['HOST_WORK_DIR']) volumes_dict = { mnt_point: { 'bind': '/mnt/labbook', 'mode': 'cached' }, 'labmanager_share_vol': { 'bind': '/mnt/share', 'mode': 'rw' } } # Set up additional bind mounts for datasets if needed. submodules = lb.git.list_submodules() for submodule in submodules: try: namespace, dataset_name = submodule['name'].split("&") submodule_dir = os.path.join(lb.root_dir, '.gigantum', 'datasets', namespace, dataset_name) ds = InventoryManager().load_dataset_from_directory(submodule_dir) ds.namespace = namespace cm_class = get_cache_manager_class(ds.client_config) cm = cm_class(ds, username) ds_cache_dir = cm.current_revision_dir.replace( '/mnt/gigantum', os.environ['HOST_WORK_DIR']) volumes_dict[ds_cache_dir] = { 'bind': f'/mnt/labbook/input/{ds.name}', 'mode': 'ro' } except InventoryException: continue # If re-mapping permissions, be sure to configure the container if 'LOCAL_USER_ID' in os.environ: env_var = [f"LOCAL_USER_ID={os.environ['LOCAL_USER_ID']}"] else: env_var = ["WINDOWS_HOST=1"] # Get resource limits resource_args = dict() memory_limit = lb.client_config.config['container']['memory'] cpu_limit = lb.client_config.config['container']['cpu'] gpu_shared_mem = lb.client_config.config['container']['gpu_shared_mem'] if memory_limit: # If memory_limit not None, pass to Docker to limit memory allocation to container resource_args["mem_limit"] = memory_limit if cpu_limit: # If cpu_limit not None, pass to Docker to limit CPU allocation to container # "nano_cpus" is an integer in factional parts of a CPU resource_args["nano_cpus"] = round(cpu_limit * 1e9) docker_client = get_docker_client() # run with nvidia-docker if we have GPU support on the Host compatible with the project should_run_nvidia, reason = should_launch_with_cuda_support( lb.cuda_version) if should_run_nvidia: logger.info(f"Launching container with GPU support:{reason}") if gpu_shared_mem: resource_args["shm_size"] = gpu_shared_mem container_id = docker_client.containers.run(tag, detach=True, init=True, name=tag, environment=env_var, volumes=volumes_dict, runtime='nvidia', **resource_args).id else: logger.info(f"Launching container without GPU support. {reason}") container_id = docker_client.containers.run(tag, detach=True, init=True, name=tag, environment=env_var, volumes=volumes_dict, **resource_args).id labmanager_ip = "" try: labmanager_ip = get_labmanager_ip() or "" except IndexError: logger.warning("Cannot find labmanager IP") labmanager_ip = labmanager_ip.strip() cmd = f"echo {labmanager_ip} > /home/giguser/labmanager_ip" for timeout in range(20): time.sleep(0.5) if docker_client.containers.get(container_id).status == 'running': r = docker_client.containers.get(container_id).exec_run( f'sh -c "{cmd}"') logger.info(f"Response to write labmanager_ip in {tag}: {r}") break else: logger.error( "After 10 seconds could not write IP to labmanager container." f" Container status = {docker_client.containers.get(container_id).status}" ) return container_id
def get_container_ip(lb_key: str) -> str: """Return the IP address of the given labbook container""" client = get_docker_client() container = client.containers.get(lb_key) return container.attrs['NetworkSettings']['Networks']['bridge'][ 'IPAddress']