示例#1
0
    def build_container_context(self, context: ModelSnapshot) -> ModelSnapshot:
        # container must be re-used
        if context.parent_id:
            return context

        context = update_instance_status_rest(instance=context, new_status='Building Container')

        log_container_build = "Started: {}\n".format(get_timestamp())
        container_image, build_logs = self._d.images.build(path=str(context.code_directory),
                                                           rm=False,
                                                           tag=context.container_image_name)

        # Convert log generator object to string
        last_key = ""
        for log_dict in build_logs:
            for key in log_dict.keys():
                if last_key == key:
                    log_container_build += "\t{}".format(log_dict[key])
                else:
                    last_key = key
                    log_container_build += "{}\t{}\n".format(key, log_dict[key])

        context = dataclasses.replace(context, container_image_id=container_image.id)
        context = dataclasses.replace(context, container_build_logs=log_container_build)

        return context
示例#2
0
    def push_container_trained_image_context(self, context: ModelSnapshot) -> ModelSnapshot:
        if self._config.push_trained_images_to_registry:
            context = update_instance_status_rest(instance=context,
                                                  new_status=f'Push Trained ModelSnapshot to Registry')

            i = self._d.images.get(context.new_container_image_name)
            new_image_tag = f"{self._config.docker_registry_address}{context.new_container_image_name}"

            # create new tag
            i.tag(new_image_tag)

            # reload image
            i.reload()

            # push image
            logs = self._d_low.push(new_image_tag)

            # Save new image tag as default
            context = dataclasses.replace(context, new_container_image_name=new_image_tag)

            merged_logs = f'{context.container_push_logs}\n\nPush new trained model Image:\n{logs}'

            # TODO: include in email
            context = dataclasses.replace(context, container_push_logs=merged_logs)

            # Write Push log
            write_to_file(Path(context.storage_path) /
                          f"Container_Push_Trained_Model_{get_timestamp(date_format='filename')}.log", logs)

        return context
示例#3
0
    def perform_pre_processing_context(self, context: Union[ModelSnapshot, Result]) -> Union[Exception,
                                                                                             ModelSnapshot,
                                                                                             Result]:
        if context.pre_processing.get('pre_processing_application', False):
            context = update_instance_status_rest(instance=context, new_status=f'Pre-Processing Input Data')

            # Pull pre processing container
            container_pre_processing_logs = "Preprocessing Container Pull Log: \n"
            container_pre_processing_logs += self.pull_container(container_image_registry="",
                                                                 container_image_name=context.pre_processing['pre_processing_container_image_name'])

            # Run pre processing container
            pre_container = self.run_container(
                container_image_name=context.pre_processing['pre_processing_container_image_name'],
                container_name=context.pre_processing['pre_processing_container_name'],
                container_autoremove=False,
                mount_volumes=context.pre_processing['container_mount_volumes']
            )

            # Wait for termination
            while pre_container.status in CONTAINER_RUNNING_STATUSES:
                time.sleep(10)

                # Reload container data
                pre_container.reload()

            # Container exited, check if success or failure
            container_info = dict()
            container_info["start_time"] = pre_container.attrs["State"]["StartedAt"]
            container_info["end_time"] = pre_container.attrs["State"]["FinishedAt"]
            container_info["exit_code"] = pre_container.attrs["State"]["ExitCode"]
            container_info["exit_message"] = pre_container.attrs["State"]["Error"]
            container_info["OOMKilled"] = pre_container.attrs["State"]["OOMKilled"]
            container_info["Dead"] = pre_container.attrs["State"]["Dead"]

            container_pre_processing_logs += "\nResult: {}\n".format(True if container_info["exit_code"] == 0 else False)
            if container_info["exit_code"] != 0 and container_info["exit_message"]:
                container_pre_processing_logs += "Errormessage: {}\n".format(container_info["exit_message"])
            container_pre_processing_logs += "Started: {}\n".format(container_info["start_time"])
            container_pre_processing_logs += "Finished: {}\n\nOutput:\n\n".format(container_info["end_time"])
            container_pre_processing_logs += pre_container.logs(timestamps=True).decode("utf-8")

            # Write runtime log to disk
            write_to_file(
                Path(context.storage_path) / f"Pre_processing_Output_{get_timestamp(date_format='filename')}.log",
                remove_ansi_escape_tags(container_pre_processing_logs))

            context = dataclasses.replace(context, container_pre_processing_logs=container_pre_processing_logs)

            # remove pre-processing container
            self.remove_container(container_name=context.pre_processing['pre_processing_container_name'])
        return context
示例#4
0
    def pull_container_context(self, context: Union[ModelSnapshot, Result]) -> Union[ModelSnapshot, Result]:
        context = update_instance_status_rest(instance=context, new_status=f'Pull Model-Container from Registry')

        log_container_pull = "Started: {}\n".format(get_timestamp())

        # Catch the exception if the image is not found (image will be build later)
        # log_container_pull += excepts(NotFound,

        # For debugging ignore errors from the registry and continue
        # TODO: Apply error handling
        log_container_pull += excepts(Exception,
                                      lambda image: self._d_low.pull(image),
                                      lambda _: 'image not present')(context.container_image_name)

        return dataclasses.replace(context, container_pull_logs=log_container_pull)
示例#5
0
    def run_container_context(self, context: Union[ModelSnapshot, Result]) -> Union[ModelSnapshot, Result]:
        # check if container is already running
        if self.get_container(context.container_name):
            Exception(f"Container '{context.container_name}' is already running. Nothing more to do.")

            return context
        else:
            context = update_instance_status_rest(instance=context, new_status=f'Running')

            container_id = self._con.run(image=context.container_image_name,
                                         name=context.container_name,
                                         environment=context.container_environment_variables,
                                         auto_remove=False,
                                         volumes=context.container_mount_volumes,
                                         ports=context.container_ports,
                                         shm_size="2G",
                                         detach=True)

        return dataclasses.replace(context, container_id=container_id.id)
示例#6
0
    def save_container_state(self, context: Union[ModelSnapshot, Result]):
        # Create archive of container
        if context.success:
            context = update_instance_status_rest(instance=context, new_status=f'Export Container State')

            # commit container state
            container = self._d.containers.get(context.container_id)
            new_container_image_name = f'snap_{context.id}'
            container.commit(repository=new_container_image_name,
                             tag=f'latest',
                             message=f"MMLP: trained model snapshot",
                             author=f"Matthias Greiner")

            # save the new container name
            context = dataclasses.replace(context, new_container_image_name=new_container_image_name)
        else:
            # Prevent methods with broken snapshots from producing errors (debugging)
            context = dataclasses.replace(context, new_container_image_name=context.container_image_name)

        return context
示例#7
0
    def archive_and_remove_container(self, context: Union[ModelSnapshot, Result]):
        # Create archive of container
        context = update_instance_status_rest(instance=context, new_status=f'Export Container Filesystem')
        container = self._d.containers.get(context.container_id)

        # Export full filesystem
        # data_stream = container.export()

        # Export specific path
        data_stream = container.get_archive('/data')[0]

        # Write archive file
        f = open(str(context.container_archive_path), "wb")
        for x in data_stream:
            f.write(x)

        # Cleanup
        # Remove the stopped container
        self.remove_container(str(context.id))

        return context
示例#8
0
    def push_container_context(self, context: ModelSnapshot) -> ModelSnapshot:
        context = update_instance_status_rest(instance=context, new_status=f'Push Model-Container to Registry')
        logs = self._d_low.push(context.container_image_name)

        return dataclasses.replace(context, container_push_logs=logs)