def _save_image(self, image_id, tar_file): """ Saves the image as a tar archive under specified name """ for x in [0, 1, 2]: self.log.info("Saving image %s to %s file..." % (image_id, tar_file)) self.log.debug("Try #%s..." % (x + 1)) try: image = self.docker.get_image(image_id) with open(tar_file, 'wb') as f: while True: # Read about 10 MB of the tar archive data = image.read(1024000) if not data: break f.write(data) self.log.info("Image saved!") return True except Exception as e: self.log.exception(e) self.log.warn( "An error occured while saving the %s image, retrying..." % image_id) raise SquashError("Couldn't save %s image!" % image_id)
def _validate_number_of_layers(self, number_of_layers): """ Makes sure that the specified number of layers to squash is a valid number """ # Only positive numbers are correct if number_of_layers <= 0: raise SquashError( "Number of layers to squash cannot be less or equal 0, provided: %s" % number_of_layers) # Do not squash if provided number of layer to squash is bigger # than number of actual layers in the image if number_of_layers > len(self.old_image_layers): raise SquashError( "Cannot squash %s layers, the %s image contains only %s layers" % (number_of_layers, self.image, len(self.old_image_layers)))
def _path_hierarchy(self, path): """ Creates a full hierarchy of directories for a given path. For a particular path, a list will be returned containing paths from the path specified, through all levels up to the root directory. Example: Path '/opt/testing/some/dir/structure/file' will return: ['/opt/testing/some/dir/structure', '/opt/testing/some/dir', '/opt/testing/some', '/opt/testing', '/opt', '/'] """ if not path: raise SquashError("No path provided to create the hierarchy for") hierarchy = [] dirname = os.path.dirname(path) hierarchy.append(dirname) # If we are already at root level, stop if dirname != '/': hierarchy.extend(self._path_hierarchy(dirname)) return hierarchy
def _squash_id(self, layer): if layer == "<missing>": self.log.warn( "You try to squash from layer that does not have it's own ID, we'll try to find it later") return None try: squash_id = self.docker.inspect_image(layer)['Id'] except: raise SquashError( "Could not get the layer ID to squash, please check provided 'layer' argument: %s" % layer) if squash_id not in self.old_image_layers: raise SquashError( "Couldn't find the provided layer (%s) in the %s image" % (layer, self.image)) self.log.debug("Layer ID to squash from: %s" % squash_id) return squash_id
def _prepare_tmp_directory(self, tmp_dir): """ Creates temporary directory that is used to work on layers """ if tmp_dir: if os.path.exists(tmp_dir): raise SquashError( "The '%s' directory already exists, please remove it before you proceed" % tmp_dir) os.makedirs(tmp_dir) else: tmp_dir = tempfile.mkdtemp(prefix="docker-squash-") self.log.debug("Using %s as the temporary directory" % tmp_dir) return tmp_dir
def _save_image(self, image_id, directory): """ Saves the image as a tar archive under specified name """ for x in [0, 1, 2]: self.log.info("Saving image %s to %s directory..." % (image_id, directory)) self.log.debug("Try #%s..." % (x + 1)) try: image = self.docker.get_image(image_id) if docker.version_info[0] < 3: # Docker library prior to 3.0.0 returned the requests # object directly which cold be used to read from self.log.debug( "Extracting image using HTTPResponse object directly") self._extract_tar(image, directory) else: # Docker library >=3.0.0 returns iterator over raw data self.log.debug( "Extracting image using iterator over raw data") fd_r, fd_w = os.pipe() r = os.fdopen(fd_r, 'rb') w = os.fdopen(fd_w, 'wb') extracter = threading.Thread(target=self._extract_tar, args=(r, directory)) extracter.start() for chunk in image: w.write(chunk) w.flush() w.close() extracter.join() r.close() self.log.info("Image saved!") return True except Exception as e: self.log.exception(e) self.log.warn( "An error occured while saving the %s image, retrying..." % image_id) raise SquashError("Couldn't save %s image!" % image_id)
def _initialize_directories(self): # Prepare temporary directory where all the work will be executed try: self.tmp_dir = self._prepare_tmp_directory(self.tmp_dir) except: raise SquashError("Preparing temporary directory failed") # Temporary location on the disk of the old, unpacked *image* self.old_image_dir = os.path.join(self.tmp_dir, "old") # Temporary location on the disk of the new, unpacked, squashed *image* self.new_image_dir = os.path.join(self.tmp_dir, "new") # Temporary location on the disk of the squashed *layer* self.squashed_dir = os.path.join(self.new_image_dir, "squashed") for d in self.old_image_dir, self.new_image_dir: os.makedirs(d)
def _generate_repositories_json(self, repositories_file, image_id, name, tag): if not image_id: raise SquashError("Provided image id cannot be null") if name == tag == None: self.log.debug( "No name and tag provided for the image, skipping generating repositories file") return repos = {} repos[name] = {} repos[name][tag] = image_id data = json.dumps(repos, separators=(',', ':')) with open(repositories_file, 'w') as f: f.write(data) f.write("\n")
def run(self): docker_version = self.docker.version() self.log.info("docker-squash version %s, Docker %s, API %s..." % (version, docker_version['GitCommit'], docker_version['ApiVersion'])) if self.image is None: raise SquashError("Image is not provided") if not (self.output_path or self.load_image): self.log.warn( "No output path specified and loading into Docker is not selected either; squashed image would not accessible, proceeding with squashing doesn't make sense" ) return if self.output_path and os.path.exists(self.output_path): self.log.warn( "Path '%s' specified as output path where the squashed image should be saved already exists, it'll be overriden" % self.output_path) if StrictVersion( docker_version['ApiVersion']) >= StrictVersion("1.22"): image = V2Image(self.log, self.docker, self.image, self.from_layer, self.tmp_dir, self.tag) else: image = V1Image(self.log, self.docker, self.image, self.from_layer, self.tmp_dir, self.tag) self.log.info("Using %s image format" % image.FORMAT) try: return self.squash(image) except: # https://github.com/goldmann/docker-scripts/issues/44 # If development mode is not enabled, make sure we clean up the # temporary directory if not self.development: image.cleanup() raise
def _save_image(self, image_id, directory): """ Saves the image as a tar archive under specified name """ for x in [0, 1, 2]: self.log.info("Saving image %s to %s directory..." % (image_id, directory)) self.log.debug("Try #%s..." % (x + 1)) try: image = self.docker.get_image(image_id) with tarfile.open(fileobj=image, mode='r|') as tar: tar.extractall(path=directory) self.log.info("Image saved!") return True except Exception as e: self.log.exception(e) self.log.warn( "An error occured while saving the %s image, retrying..." % image_id) raise SquashError("Couldn't save %s image!" % image_id)
def _before_squashing(self): self._initialize_directories() # Location of the tar archive with squashed layers self.squashed_tar = os.path.join(self.squashed_dir, "layer.tar") if self.tag: self.image_name, self.image_tag = self._parse_image_name(self.tag) # The image id or name of the image to be squashed try: self.old_image_id = self.docker.inspect_image(self.image)['Id'] except SquashError: raise SquashError( "Could not get the image ID to squash, please check provided 'image' argument: %s" % self.image) self.old_image_layers = [] # Read all layers in the image self._read_layers(self.old_image_layers, self.old_image_id) self.old_image_layers.reverse() self.log.info("Old image has %s layers", len(self.old_image_layers)) self.log.debug("Old layers: %s", self.old_image_layers) # By default - squash all layers. if self.from_layer == None: self.from_layer = len(self.old_image_layers) try: number_of_layers = int(self.from_layer) self.log.debug( "We detected number of layers as the argument to squash") except ValueError: self.log.debug("We detected layer as the argument to squash") squash_id = self._squash_id(self.from_layer) if not squash_id: raise SquashError( "The %s layer could not be found in the %s image" % (self.from_layer, self.image)) number_of_layers = len(self.old_image_layers) - \ self.old_image_layers.index(squash_id) - 1 self._validate_number_of_layers(number_of_layers) marker = len(self.old_image_layers) - number_of_layers self.layers_to_squash = self.old_image_layers[marker:] self.layers_to_move = self.old_image_layers[:marker] self.log.info("Checking if squashing is necessary...") if len(self.layers_to_squash) < 1: raise SquashError("Invalid number of layers to squash: %s" % len(self.layers_to_squash)) if len(self.layers_to_squash) == 1: raise SquashUnnecessaryError( "Single layer marked to squash, no squashing is required") self.log.info("Attempting to squash last %s layers...", number_of_layers) self.log.debug("Layers to squash: %s", self.layers_to_squash) self.log.debug("Layers to move: %s", self.layers_to_move) # Fetch the image and unpack it on the fly to the old image directory self._save_image(self.old_image_id, self.old_image_dir) self.size_before = self._dir_size(self.old_image_dir) self.log.info("Squashing image '%s'..." % self.image)