def test_extract_file_function(self): test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) base = fileutils.parent_directory(test_file) expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt'] cleaned_test_file = test_file.replace(base, '') expected_event = [ extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=False, warnings=[], errors=[]), extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=True, warnings=[], errors=[]) ] target = extractcode.get_extraction_path(test_file) result = list(extract.extract_file(test_file, target)) result = [ r._replace( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file)) for r in result ] assert expected_event == result check_files(target, expected)
def test_extract_file_function(self): test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) base = fileutils.parent_directory(test_file) expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt'] cleaned_test_file = test_file.replace(base, '') expected_event = [ extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=False, warnings=[], errors=[] ), extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=True, warnings=[], errors=[] ) ] target = extractcode.get_extraction_path(test_file) result = list(extract.extract_file(test_file, target)) result = [r._replace( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file)) for r in result] assert expected_event == result check_files(target, expected)
def extract(location, target): """ Wraps the `extractcode.extract_file` to execute the extraction and return errors. """ errors = [] for event in extract_file(location, target): if event.done: errors.extend(event.errors) return errors
def extract_input_files_to_codebase_directory(self): """ Extract root filesystem input archives with extractcode. """ input_files = self.project.inputs("*") target = str(self.project.codebase_path) extract_errors = [] for input_file in input_files: for event in extract_file(input_file, target): if event.done: extract_errors.extend(event.errors) if extract_errors: self.add_error("\n".join(extract_errors))
def rebuild_rootfs(image, target_dir, layerid_len=DEFAULT_ID_LEN): """ Extract and merge all layers to target_dir. Extraction is done in sequence from bottom (root) to top (latest layer). Return a mapping of errors and a list of whiteouts/deleted files. The extraction process consists of these steps: - extract the layer in a temp directory - move layer to the target directory, overwriting existing files - if any, remove AUFS special files/dirs in the target directory - if any, remove whiteouts file/directory pairs in the target directory """ from extractcode.extract import extract_file assert filetype.is_dir(target_dir) assert os.path.exists(target_dir) extract_errors = [] # log whiteouts deletions whiteouts = [] for layer_id, layer in image.layers.items(): layer_tarball = join(image.repo_dir, layer_id[:layerid_len], LAYER_TAR_FILE) logger.debug('Extracting layer tarball: %(layer_tarball)r' % locals()) temp_target = fileutils.get_temp_dir('conan-docker') xevents = list(extract_file(layer_tarball, temp_target)) for x in xevents: if x.warnings or x.errors: extract_errors.extend(xevents) # FIXME: the order of ops is WRONG: we are getting whiteouts incorrectly # it should be: # 1. extract a layer to temp. # 2. find whiteouts in that layer. # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs) # 4. finall copy the extracted layer over the WIP rootfs # move extracted layer to target_dir logger.debug( 'Moving extracted layer from: %(temp_target)r to: %(target_dir)r') fileutils.copytree(temp_target, target_dir) fileutils.delete(temp_target) logger.debug( 'Merging extracted layers and applying AUFS whiteouts/deletes') for top, dirs, files in fileutils.walk(target_dir): # delete AUFS dirs and apply whiteout deletions for dr in dirs[:]: whiteable_dir = join(top, dr) if dr.startswith(WHITEOUT_PREFIX): # delete the .wh. dir... dirs.remove(dr) logger.debug('Deleting whiteout dir: %(whiteable_dir)r' % locals()) fileutils.delete(whiteable_dir) # ... and delete the corresponding dir it does "whiteout" base_dir = dr[len(WHITEOUT_PREFIX):] try: dirs.remove(base_dir) except ValueError: # FIXME: should we really raise an exception here? msg = ('Inconsistent layers: ' 'missing directory to whiteout: %(base_dir)r' % locals()) raise InconsistentLayersError(msg) wdo = join(top, base_dir) logger.debug('Deleting real dir: %(wdo)r' % locals()) fileutils.delete(wdo) whiteouts.append(wdo) # delete AUFS special dirs elif dr.startswith(WHITEOUT_SPECIAL_DIR_PREFIX): dirs.remove(dr) logger.debug( 'Deleting AUFS special dir: %(whiteable_dir)r' % locals()) fileutils.delete(whiteable_dir) # delete AUFS files and apply whiteout deletions all_files = set(files) for fl in all_files: whiteable_file = join(top, fl) if fl.startswith(WHITEOUT_PREFIX): # delete the .wh. marker file... logger.debug('Deleting whiteout file: %(whiteable_file)r' % locals()) fileutils.delete(whiteable_file) # ... and delete the corresponding file it does "whiteout" # e.g. logically delete base_file = fl[len(WHITEOUT_PREFIX):] wfo = join(top, base_file) whiteouts.append(wfo) if base_file in all_files: logger.debug('Deleting real file: %(wfo)r' % locals()) fileutils.delete(wfo) # delete AUFS special files elif fl.startswith(WHITEOUT_SPECIAL_DIR_PREFIX): logger.debug( 'Deleting AUFS special file: %(whiteable_file)r' % locals()) fileutils.delete(whiteable_file) whiteouts.append(whiteable_file) return extract_errors, whiteouts