def test_file_base_name_on_dir_path(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'a/b/' expected_name = 'b' result = fileutils.file_base_name(test_file) assert expected_name == result result = fileutils.file_base_name(join(test_dir, test_file)) assert expected_name == result
def test_file_base_name_on_path_for_plain_dir_with_extension(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'f.a/' expected_name = 'f.a' result = fileutils.file_base_name(test_file) assert expected_name == result result = fileutils.file_base_name(join(test_dir, test_file)) assert expected_name == result
def test_file_base_name_on_path_for_plain_file(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'tst' expected_name = 'tst' result = fileutils.file_base_name(test_file) assert expected_name == result result = fileutils.file_base_name(join(test_dir, test_file)) assert expected_name == result
def test_file_base_name_on_file_path_for_file_with_known_composed_extension(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'a/b/a.tar.gz' expected_name = 'a' result = fileutils.file_base_name(test_file) assert expected_name == result result = fileutils.file_base_name(join(test_dir, test_file)) assert expected_name == result
def test_file_base_name_on_path_and_location_1(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'a/.a/file' expected_name = 'file' result = fileutils.file_base_name(test_file) assert expected_name == result result = fileutils.file_base_name(join(test_dir, test_file)) assert expected_name == result
def is_special_legal_file(location): """ Return an indication that a file may be a "special" legal-like file. """ file_base_name = fileutils.file_base_name(location) file_base_name_lower = file_base_name.lower() file_extension = fileutils.file_extension(location) file_extension_lower = file_extension.lower() name_contains_special = (special_name in file_base_name or special_name in file_extension for special_name in special_names) name_lower_is_special = (special_name_lower in (file_base_name_lower, file_extension_lower) for special_name_lower in special_names_lower) name_lower_contains_special = ( special_name_lower in file_base_name_lower or special_name_lower in file_extension_lower for special_name_lower in special_names_lower) if any(name_contains_special) or any(name_lower_is_special): return 'yes' elif any(name_lower_contains_special): return 'maybe' else: # return False for now? pass
def uncompress_file(location, decompressor): """ Uncompress a compressed file at location and return a temporary location of the uncompressed file and a list of warning messages. Raise Exceptions on errors. Use the `decompressor` object for decompression. """ # FIXME: do not create a sub-directory and instead strip the "compression" # extension such gz, etc. or introspect the archive header to get the file # name when present. assert location assert decompressor warnings = [] base_name = fileutils.file_base_name(location) target_location = os.path.join(fileutils.get_temp_dir(base_dir='extract'), base_name) with decompressor(location, 'rb') as compressed: with open(target_location, 'wb') as uncompressed: buffer_size = 32 * 1024 * 1024 while True: chunk = compressed.read(buffer_size) if not chunk: break uncompressed.write(chunk) if getattr(decompressor, 'has_trailing_garbage', False): warnings.append(location + ': Trailing garbage found and ignored.') return target_location, warnings
def load_rules(rule_dir=rules_data_dir): """ Return a list of rules, loaded from rules files. FIXME: return an iterable instead """ rules = [] seen_files = set() processed_files = set() for top, _, files in os.walk(rule_dir): for yfile in files: if yfile.endswith('.yml'): data_file = join(top, yfile) base_name = fileutils.file_base_name(yfile) text_file = join(top, base_name + '.RULE') rule = Rule(data_file=data_file, text_file=text_file) rules.append(rule) processed_files.add(data_file) processed_files.add(text_file) seen_file = join(top, yfile) seen_files.add(seen_file) unknown_files = seen_files - processed_files if unknown_files: print(unknown_files) files = '\n'.join(sorted(unknown_files)) msg = 'Unknown files in rule directory: %(rule_dir)r\n%(files)s' raise Exception(msg % locals()) return rules
def load_license_tests(test_dir=TEST_DATA_DIR): """ Yield an iterable of LicenseTest loaded from test data files in test_dir. """ # first collect files with .yml extension and files with other extensions # in two maps keyed by file base_name data_files = {} test_files = {} for top, _, files in os.walk(test_dir): for yfile in files: if yfile. endswith('~'): continue base_name = fileutils.file_base_name(yfile) file_path = abspath(join(top, yfile)) if yfile.endswith('.yml'): assert base_name not in data_files data_files[base_name] = file_path else: assert base_name not in test_files test_files[base_name] = file_path # ensure that each data file has a corresponding test file diff = set(data_files.keys()).symmetric_difference(set(test_files.keys())) assert not diff, ('Orphaned license test file(s) found: ' 'test file without its YAML test descriptor ' 'or YAML test descriptor without its test file.') # second, create pairs of corresponding (data_file, test file) for files # that have the same base_name for base_name, data_file in data_files.items(): test_file = test_files[base_name] yield LicenseTest(data_file, test_file)
def load_licenses(licenses_data_dir=licenses_data_dir , with_deprecated=False): """ Return a mapping of key -> license objects, loaded from license files. Raise Exceptions if there are dangling orphaned files. """ licenses = {} used_files = set() all_files = set(resource_iter(licenses_data_dir, ignored=ignore_editor_tmp_files, with_dirs=False)) for data_file in sorted(all_files): if data_file.endswith('.yml'): key = file_base_name(data_file) lic = License(key, licenses_data_dir) used_files.add(data_file) if exists(lic.text_file): used_files.add(lic.text_file) if not with_deprecated and lic.is_deprecated: continue licenses[key] = lic dangling = all_files.difference(used_files) if dangling: msg = 'Some License data or text files are orphaned in "{}".\n'.format(licenses_data_dir) msg += '\n'.join('file://{}'.format(f) for f in sorted(dangling)) raise Exception(msg) return licenses
def load_license_tests(test_dir=TEST_DATA_DIR): """ Yield an iterable of LicenseTest loaded from test data files in test_dir. """ # first collect files with .yml extension and files with other extensions # in two maps keyed by file base_name data_files = {} test_files = {} for top, _, files in os.walk(test_dir): for yfile in files: base_name = fileutils.file_base_name(yfile) file_path = abspath(join(top, yfile)) if yfile.endswith(".yml"): assert base_name not in data_files data_files[base_name] = file_path else: assert base_name not in test_files test_files[base_name] = file_path # ensure that each data file has a corresponding test file diff = set(data_files.keys()).symmetric_difference(set(test_files.keys())) assert not diff # second, create pairs of a data_file and the corresponding test file # that have the same base_name for base_name, data_file in data_files.items(): test_file = test_files[base_name] yield LicenseTest(data_file, test_file)
def create_html_app(output_file, results, version, scanned_path): # NOQA """ Given an html-app output_file, generate that file, create the data.js data file from the results and create the corresponding `_files` directory and copy the data and assets to this directory. The target directory is deleted if it exists. Raise HtmlAppAssetCopyWarning if the output_file is <stdout> or HtmlAppAssetCopyError if the copy was not possible. """ try: if is_stdout(output_file): raise HtmlAppAssetCopyWarning() source_assets_dir = join(TEMPLATES_DIR, 'html-app', 'assets') # Return a tuple of (parent_dir, dir_name) directory named after the # `output_location` output_locationfile_base_name (stripped from extension) and # a `_files` suffix Return empty strings if output is to stdout. output_location = output_file.name tgt_root_path = dirname(output_location) tgt_assets_dir = file_base_name(output_location) + '_files' # delete old assets target_assets_dir = join(tgt_root_path, tgt_assets_dir) if exists(target_assets_dir): delete(target_assets_dir) # copy assets copytree(source_assets_dir, target_assets_dir) template = get_template( join(TEMPLATES_DIR, 'html-app', 'template.html')) rendered_html = template.render(assets_dir=target_assets_dir, scanned_path=scanned_path, version=version) output_file.write(rendered_html) # create help file help_template = get_template( join(TEMPLATES_DIR, 'html-app', 'help_template.html')) rendered_help = help_template.render(main_app=output_location) with io.open(join(target_assets_dir, 'help.html'), 'w', encoding='utf-8') as f: f.write(rendered_help) # FIXME: this should a regular JSON scan format with io.open(join(target_assets_dir, 'data.js'), 'w') as f: f.write('data=') json.dump(list(results), f) except HtmlAppAssetCopyWarning as w: raise w except Exception as e: # NOQA import traceback msg = 'ERROR: cannot create HTML application.\n' + traceback.format_exc( ) raise HtmlAppAssetCopyError(msg)
def load_license_tests(test_dir=TEST_DATA_DIR): """ Yield an iterable of LicenseTest loaded from test data files in test_dir. """ # first collect files with .yml extension and files with other extensions # in two maps keyed by file base_name data_files = {} test_files = {} for top, _, files in os.walk(test_dir): for yfile in files: base_name = fileutils.file_base_name(yfile) file_path = abspath(join(top, yfile)) if yfile.endswith('.yml'): assert base_name not in data_files data_files[base_name] = file_path else: assert base_name not in test_files test_files[base_name] = file_path # ensure that each data file has a corresponding test file diff = set(data_files.keys()).symmetric_difference(set(test_files.keys())) assert not diff # second, create pairs of a data_file and the corresponding test file # that have the same base_name for base_name, data_file in data_files.items(): test_file = test_files[base_name] yield LicenseTest(data_file, test_file)
def get_dockerfile(location, echo=print): """ Return a Dockerfile data dictionary if the location is a Dockerfile, otherwise return None. """ fn = fileutils.file_base_name(location) if not 'Dockerfile' in fn: return {} echo('Found Dockerfile at: %(location)r' % locals()) try: # TODO: keep comments instead of ignoring them: # assign the comments before an instruction line to a line "comment" attribute # assign end of line comment to the line # assign top of file and end of file comments to file level comment attribute df = dockerfile_parse.DockerfileParser(location) df_data = OrderedDict() df_data['location'] = location df_data['base_image'] = df.baseimage df_data['instructions'] = [] for entry in df.structure: entry = OrderedDict([(k, v) for k, v in sorted(entry.items()) if k in ( 'instruction', 'startline', 'value', )]) df_data['instructions'].append(entry) return {location: df_data} except: echo('Error parsing Dockerfile at: %(location)r' % locals()) return {}
def load_rules(rules_data_dir=rules_data_dir): """ Return an iterable of rules loaded from rule files. """ # TODO: OPTIMIZE: create a graph of rules to account for containment and # similarity clusters? seen_files = set() processed_files = set() lower_case_files = set() case_problems = set() model_errors = [] for data_file in resource_iter(rules_data_dir, with_dirs=False): if data_file.endswith('.yml'): base_name = file_base_name(data_file) rule_file = join(rules_data_dir, base_name + '.RULE') try: rule = Rule(data_file=data_file, text_file=rule_file) yield rule except Exception as re: model_errors.append(str(re)) # accumulate sets to ensures we do not have illegal names or extra # orphaned files data_lower = data_file.lower() if data_lower in lower_case_files: case_problems.add(data_lower) else: lower_case_files.add(data_lower) rule_lower = rule_file.lower() if rule_lower in lower_case_files: case_problems.add(rule_lower) else: lower_case_files.add(rule_lower) processed_files.update([data_file, rule_file]) if not data_file.endswith('~'): seen_files.add(data_file) if model_errors: errors = '\n'.join(model_errors) msg = 'Invalid in rule directory: %(rules_data_dir)r\n%(errors)s' raise Exception(msg % locals()) unknown_files = seen_files - processed_files if unknown_files or case_problems: if unknown_files: files = '\n'.join(sorted('file://' + f for f in unknown_files)) msg = 'Orphaned files in rule directory: %(rules_data_dir)r\n%(files)s' if case_problems: files = '\n'.join(sorted('file://' + f for f in case_problems)) msg += '\nRule files with non-unique name ignoring casein rule directory: %(rules_data_dir)r\n%(files)s' raise Exception(msg % locals())
def test_load_image_config(self): test_dir = self.get_test_loc('images/config') expected_dir = self.get_test_loc('images/config_expected') for config_file in os.listdir(test_dir): base_name = fileutils.file_base_name(config_file) config_file = os.path.join(test_dir, config_file) image = Image.load_image_config(config_file) expected = os.path.join(expected_dir, base_name + '.expected.json') result = image.as_dict() check_expected(result, expected, regen=True)
def test_file_base_name_on_path_and_location(self): test_dir = self.get_test_loc("fileutils/basename", copy=True) tests = [ ("a/.a/file", "file"), ("a/.a/", ".a"), ("a/b/.a.b", ".a"), ("a/b/a.tag.gz", "a.tag"), ("a/b/", "b"), ("a/f.a", "f"), ("a/", "a"), ("f.a/a.c", "a"), ("f.a/", "f.a"), ("tst", "tst"), ] for test_file, name in tests: result = fileutils.file_base_name(test_file) assert name == result # also test on location result = fileutils.file_base_name((os.path.join(test_dir, test_file))) assert name == result
def get_html_app_files_dirs(output_file): """ Return a tuple of (parent_dir, dir_name) directory named after the `output_file` file object file_base_name (stripped from extension) and a `_files` suffix Return None if output is to stdout. """ file_name = output_file.name if file_name == "<stdout>": return parent_dir = dirname(file_name) dir_name = fileutils.file_base_name(file_name) + "_files" return parent_dir, dir_name
def get_resource_info(location): """ Return a mapping suitable for the creation of a new CodebaseResource. """ file_info = {} location_path = Path(location) is_symlink = location_path.is_symlink() is_file = location_path.is_file() if is_symlink: resource_type = CodebaseResource.Type.SYMLINK file_info["status"] = "symlink" elif is_file: resource_type = CodebaseResource.Type.FILE else: resource_type = CodebaseResource.Type.DIRECTORY file_info.update( { "type": resource_type, "name": fileutils.file_base_name(location), "extension": fileutils.file_extension(location), } ) if is_symlink: return file_info # Missing fields on CodebaseResource model returned by `get_file_info`. unsupported_fields = [ "is_binary", "is_text", "is_archive", "is_media", "is_source", "is_script", "date", ] other_info = scancode_api.get_file_info(location) # Skip unsupported_fields # Skip empty values to avoid null vs. '' conflicts other_info = { field_name: value for field_name, value in other_info.items() if field_name not in unsupported_fields and value } file_info.update(other_info) return file_info
def test_file_base_name_on_path_and_location(self): test_dir = self.get_test_loc('fileutils/basename', copy=True) tests = [ ('a/.a/file', 'file'), ('a/.a/', '.a'), ('a/b/.a.b', '.a'), ('a/b/a.tag.gz', 'a.tag'), ('a/b/', 'b'), ('a/f.a', 'f'), ('a/', 'a'), ('f.a/a.c', 'a'), ('f.a/', 'f.a'), ('tst', 'tst'), ] for test_file, name in tests: result = fileutils.file_base_name(test_file) assert name == result # also test on location result = fileutils.file_base_name( (os.path.join(test_dir, test_file))) assert name == result
def get_html_app_files_dirs(output_file): """ Return a tuple of (parent_dir, dir_name) directory named after the `output_file` file object file_base_name (stripped from extension) and a `_files` suffix Return None if output is to stdout. """ file_name = output_file.name if file_name == '<stdout>': return parent_dir = dirname(file_name) dir_name = fileutils.file_base_name(file_name) + '_files' return parent_dir, dir_name
def get_html_app_files_dirs(output_file): """ Return a tuple of (parent_dir, dir_name) directory named after the `output_file` file object file_base_name (stripped from extension) and a `_files` suffix Return empty strings if output is to stdout. """ if is_stdout(output_file): return '', '' file_name = output_file.name parent_dir = dirname(file_name) dir_name = fileutils.file_base_name(file_name) + '_files' return parent_dir, dir_name
def get_html_app_files_dirs(output_file): """ Return a tuple of (parent_dir, dir_name) directory named after the `output_file` file object file_base_name (stripped from extension) and a `_files` suffix Return empty strings if output is to stdout. """ if is_stdout(output_file): return '', '' file_name = output_file.name parent_dir = os.path.dirname(file_name) dir_name = fileutils.file_base_name(file_name) + '_files' return parent_dir, dir_name
def load_licenses(licenses_data_dir=licenses_data_dir , with_deprecated=False): """ Return a mapping of key -> license objects, loaded from license files. """ licenses = {} for data_file in file_iter(licenses_data_dir): if not data_file.endswith('.yml'): continue key = file_base_name(data_file) lic = License(key, licenses_data_dir) if not with_deprecated and lic.is_deprecated: continue licenses[key] = lic return licenses
def load_licenses(licenses_data_dir=licenses_data_dir, with_deprecated=False): """ Return a mapping of key -> license objects, loaded from license files. """ licenses = {} for data_file in file_iter(licenses_data_dir): if not data_file.endswith('.yml'): continue key = file_base_name(data_file) lic = License(key, licenses_data_dir) if not with_deprecated and lic.is_deprecated: continue licenses[key] = lic return licenses
def parse(cls, location): with open(location, encoding='utf-8') as loc: readme_manifest = loc.read() package_data = build_package(readme_manifest) if not package_data.name: # If no name was detected for the Package, then we use the basename # of the parent directory as the Package name parent_dir = fileutils.parent_directory(location) parent_dir_basename = fileutils.file_base_name(parent_dir) package_data.name = parent_dir_basename yield package_data
def parse(location): """ Return a Package object from a package.json file or None. """ if not is_package_json(location): return with codecs.open(location, encoding='utf-8') as loc: package_data = json.load(loc, object_pairs_hook=OrderedDict) # a package.json is at the root of an NPM package base_dir = fileutils.parent_directory(location) metafile_name = fileutils.file_base_name(location) return build_package(package_data, base_dir, metafile_name)
def load_rules(rules_data_dir=rules_data_dir, load_notes=False): """ Return an iterable of rules loaded from rule files. """ # TODO: OPTIMIZE: create a graph of rules to account for containment and similarity clusters? # TODO: we should assign the rule id at that stage seen_files = set() processed_files = set() lower_case_files = set() case_problems = set() for data_file in file_iter(rules_data_dir): if data_file.endswith('.yml'): base_name = file_base_name(data_file) rule_file = join(rules_data_dir, base_name + '.RULE') yield Rule(data_file=data_file, text_file=rule_file, load_notes=load_notes) # accumulate sets to ensures we do not have illegal names or extra # orphaned files data_lower = data_file.lower() if data_lower in lower_case_files: case_problems.add(data_lower) else: lower_case_files.add(data_lower) rule_lower = rule_file.lower() if rule_lower in lower_case_files: case_problems.add(rule_lower) else: lower_case_files.add(rule_lower) processed_files.update([data_file, rule_file]) if not data_file.endswith('~'): seen_files.add(data_file) unknown_files = seen_files - processed_files if unknown_files or case_problems: if unknown_files: files = '\n'.join(sorted(unknown_files)) msg = 'Orphaned files in rule directory: %(rules_data_dir)r\n%(files)s' if case_problems: files = '\n'.join(sorted(case_problems)) msg += '\nRule files with non-unique name ignoring casein rule directory: %(rules_data_dir)r\n%(files)s' raise Exception(msg % locals())
def is_special_legal_file(location): file_base_name = fileutils.file_base_name(location).lower() file_extension = fileutils.file_extension(location).lower() if (any(special_name == file_base_name or special_name == file_extension for special_name in special_names_lower) or any(special_name in file_base_name or special_name in file_extension for special_name in special_names)): return 'yes' elif any(special_name in file_base_name or special_name in file_extension for special_name in special_names_lower): return 'maybe' else: # return False for now? pass
def recognize(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with open(location, encoding='utf-8') as loc: readme_manifest = loc.read() package = build_package(cls, readme_manifest) if not package.name: # If no name was detected for the Package, then we use the basename of # the parent directory as the Package name parent_dir = fileutils.parent_directory(location) parent_dir_basename = fileutils.file_base_name(parent_dir) package.name = parent_dir_basename yield package
def new_name(location, is_dir=False): """ Return a new non-existing location usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the name. The case of the name is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique name: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location location = location.rstrip('\\/') name = fileutils.file_name(location).strip() if (not name or name == '.' # windows bare drive path as in c: or z: or (name and len(name) == 2 and name.endswith(':'))): name = 'file' parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) if name.lower() not in siblings_lower: return posixpath.join(parent, name) ext = fileutils.file_extension(name) base_name = fileutils.file_base_name(name) if is_dir: # directories have no extension ext = '' base_name = name counter = 1 while True: new_name = base_name + '_' + str(counter) + ext if new_name.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, new_name)
def new_name(location, is_dir=False): """ Return a new non-existing location usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the name. The case of the name is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique name: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location location = location.rstrip('\\/') name = fileutils.file_name(location).strip() if (not name or name == '.' # windows bare drive path as in c: or z: or (name and len(name)==2 and name.endswith(':'))): name = 'file' parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) if name.lower() not in siblings_lower: return posixpath.join(parent, name) ext = fileutils.file_extension(name) base_name = fileutils.file_base_name(name) if is_dir: # directories have no extension ext = '' base_name = name counter = 1 while True: new_name = base_name + '_' + str(counter) + ext if new_name.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, new_name)
def parse_debian_files_list(location, datasource_id, package_type): """ Yield PackageData from a list of file paths at locations such as an from a Debian installed .list or .md5sums file. """ qualifiers = {} filename = fileutils.file_base_name(location) if ':' in filename: name, _, arch = filename.partition(':') qualifiers['arch'] = arch else: name = filename file_references = [] with open(location) as info_file: for line in info_file: line = line.strip() if not line or line.startswith('#'): continue # for a plain file lits, the md5sum will be empty md5sum, _, path = line.partition(' ') path = path.strip() md5sum = md5sum and md5sum.strip() or None # we ignore dirs in general, and we ignore these that would # be created a plain dir when we can if path in ignored_root_dirs: continue ref = models.FileReference(path=path, md5=md5sum) file_references.append(ref) if not file_references: return yield models.PackageData( datasource_id=datasource_id, type=package_type, name=name, qualifiers=qualifiers, file_references=file_references, )
def is_special_legal_file(location): """ Return an indication that a file may be a "special" legal-like file. """ file_base_name = fileutils.file_base_name(location).lower() file_extension = fileutils.file_extension(location).lower() if (any(special_name == file_base_name or special_name == file_extension for special_name in special_names_lower) or any(special_name in file_base_name or special_name in file_extension for special_name in special_names)): return 'yes' elif any(special_name in file_base_name or special_name in file_extension for special_name in special_names_lower): return 'maybe' else: # return False for now? pass
def parse(location): """ Return a Package object from a README manifest file or None. """ if not is_readme_manifest(location): return with open(location, encoding='utf-8') as loc: readme_manifest = loc.read() package = build_package(readme_manifest) if not package.name: # If no name was detected for the Package, then we use the basename of # the parent directory as the Package name parent_dir = fileutils.parent_directory(location) parent_dir_basename = fileutils.file_base_name(parent_dir) package.name = parent_dir_basename return package
def rules(rule_dir=rules_data_dir): """ Return an iterable of rules loaded from rules files. """ # TODO: OPTIMIZE: break RULES with gaps in multiple sub-rules?? # TODO: OPTIMIZE: create a graph of rules to account for containment and similarity clusters? seen_files = set() processed_files = set() for top, _, files in walk(rule_dir): for yfile in (f for f in files if f.endswith('.yml')): data_file = join(top, yfile) base_name = file_base_name(yfile) text_file = join(top, base_name + '.RULE') processed_files.add(data_file) processed_files.add(text_file) yield Rule(data_file=data_file, text_file=text_file) seen_files.add(join(top, yfile)) unknown_files = seen_files - processed_files if unknown_files: print(unknown_files) files = '\n'.join(sorted(unknown_files)) msg = 'Unknown files in rule directory: %(rule_dir)r\n%(files)s' raise Exception(msg % locals())
def get_test_method_name(self): dfn = fileutils.file_base_name(self.data_file.lower()) test_name = f'test_alpine_license_detection_{dfn}' return text.python_safe_name(test_name)
def load_manifest(self, repo_dir): """ Load this repository from a "manifest.json" JSON file for format v1.1/1.2. This file is a mapping with this shape: - The `Config` field references another JSON file in the tar or repo which includes the image data for this image. - The `RepoTags` field lists references pointing to this image. - The `Layers` field points to the filesystem changeset tars, e.g. the path to the layer.tar files as a list ordered from bottom to top layer. - An optional `Parent` field references the imageID (as a sha256-prefixed digest?) of the parent image. This parent must be part of the same `manifest.json` file. For example: [ {'Config': '7043867122e704683c9eaccd7e26abcd5bc9fea413ddfeae66166697bdcbde1f.json', 'Layers': [ '768d4f50f65f00831244703e57f64134771289e3de919a576441c9140e037ea2/layer.tar', '6a630e46a580e8b2327fc45d9d1f4734ccaeb0afaa094e0f45722a5f1c91e009/layer.tar', ] 'RepoTags': ['user/image:version'], "Parent": "sha256:5a00e6ccb81ef304e1bb9995ea9605f199aa96659a44237d58ca96982daf9af8" }, {'Config': '7043867122e704683c9eaccd7e26abcd5bc9fea413ddfeae66166697bdcbde1f.json', 'Layers': [ '768d4f50f65f00831244703e57f64134771289e3de919a576441c9140e037ea2/layer.tar', '6a630e46a580e8b2327fc45d9d1f4734ccaeb0afaa094e0f45722a5f1c91e009/layer.tar', ] 'RepoTags': ['user/image:version'] }, ] """ manifest_file = join(repo_dir, MANIFEST_JSON_FILE) manifest = load_json(manifest_file) for image_config in manifest: config_file = image_config.get('Config') config_file = join(repo_dir, config_file) if not exists(config_file): # FIXME: orphaned manifest entry image_id = file_base_name(config_file) image = Image(image_id=image_id) assert image.image_id not in self.images_by_id self.images_by_id[image.image_id] = image continue image = Image.load_image_config(config_file) assert image.image_id not in self.images_by_id self.images_by_id[image.image_id] = image image.parent_digest = image_config.get('Parent') image.tags = image_config.get('RepoTags') or [] for tag in image.tags: self.image_id_by_tags[tag] = image.image_id layer_paths = image_config.get('Layers') or [] layers = OrderedDict() for lp in layer_paths: layer_dir = fileutils.parent_directory(lp).strip('/') layer_id = layer_dir layer_dir = join(repo_dir, layer_dir) layer = Layer.load_layer(layer_dir) layer_digest = sha256_digest(join(repo_dir, lp)) if layer.layer_digest: assert layer.layer_digest == layer_digest layers[layer_id] = layer self.layers_by_id[layer_id] = layer # the last one is the top one image.top_layer_id = layer_id image.top_layer_digest = layer_digest
def load_image_config(cls, config_file, verbose=True): """ Return an Image object built from the image_config JSON file at location. Each Config JSON file for each image has this shape: { 'docker_version': '1.8.2', 'os': 'linux', 'architecture': 'amd64', 'author': '<author name>', 'created': '2016-09-30T10:16:27.109917034Z', 'container': '1ee508bc7a35150c9e5924097a31dfb4b6b2ca1260deb6fd14cb03c53764e40b', # these two mappings are essentially similar: image_config is the runtime image_config # and container_config is the image_config as it existed when the container was created. 'image_config': { <some image_config k/v pairs> }, 'container_config': { <some image_config k/v pairs> }, # array of objects describing the history of each layer. # The array is ordered from bottom-most layer to top-most layer. 'history': [ {'author': 'The CentOS Project <*****@*****.**> - ami_creator', 'created': '2015-04-22T05:12:47.171582029Z', 'created_by': '/bin/sh -c #(nop) MAINTAINER The CentOS Project <*****@*****.**> - ami_creator' 'comment': 'some comment (eg a commit message)', 'empty_layer': True or False (if not present, defaults to False. True for empty, no-op layers with no rootfs content. }, {'author': 'The CentOS Project <*****@*****.**> - ami_creator', 'created': '2015-04-22T05:13:47.072498418Z', 'created_by': '/bin/sh -c #(nop) ADD file:eab3c29917290b056db08167d3a9f769c4b4ce46403be2fad083bc2535fb4d03 in /' }, ] # this is in order from bottom-most to top-most # each id is the sha256 of a layer.tar # NOTE: Empty layer may NOT have their digest listed here, so this list # may not align exactly with the history list: # e.g. this list only has entries if "empty_layer" is not set to True for that layer. 'rootfs': { 'diff_ids': ['sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef', 'sha256:2436bc321ced91d2f3052a98ff886a2feed0788eb524b2afeb48099d084c33f5', 'sha256:cd141a5beb0ec83004893dfea6ea8508c6d09a0634593c3f35c0d433898c9322',] 'type': u'layers' } } """ image_id = fileutils.file_base_name(config_file) config_digest = sha256_digest(config_file) if image_id != as_bare_id(config_digest): print('WARNING: image config digest is not consistent.') config_digest = 'sha256' + image_id image_config = load_json(config_file) # merge "configs" ccnf = image_config.pop('container_config', {}) cnf = image_config.pop('config', {}) config, warns = merge_configs(ccnf, cnf) if warns and verbose: print('Warning when loading: %(config_file)r' % locals()) for w in warns: print(w) rootfs = image_config.pop('rootfs') # we only support this for now assert rootfs['type'] == 'layers' digests = rootfs['diff_ids'] digests_it = iter(digests) # FIXME: this may not work if there is a diff for an empty layer with a # digest for some EMPTY content e.g. e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 # update layer data with digest(e.g align history and diff digests, skipping empty layers that have no digest) layers = image_config.pop('history') for lay in layers: if lay.get('empty_layer'): continue lay['layer_digest'] = next(digests_it) remaining = list(digests_it) assert not remaining layers = [Layer(**l) for l in layers] image_data = dict ( image_id=image_id, layers=layers, config_digest=config_digest, top_layer_digest=layers[-1].layer_digest, top_layer_id=layers[-1].layer_id, config=config, ) image_data.update(image_config) image = Image(**image_data) return image