def test_new_name_with_extensions(self): test_dir = self.get_test_loc("new_name/ext", copy=True) renamed = new_name(join(test_dir, "test.txt"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test_3.txt" == result renamed = new_name(join(test_dir, "TEST.txt"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "TEST_3.txt" == result renamed = new_name(join(test_dir, "TEST.tXt"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "TEST_3.tXt" == result renamed = new_name(join(test_dir, "test.txt"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test.txt_2" == result renamed = new_name(join(test_dir, "teST.txt"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "teST.txt_2" == result
def test_new_name_with_empties(self): base_dir = self.get_temp_dir() self.assertRaises(AssertionError, new_name, '', is_dir=False) test_file = base_dir + '/' renamed = new_name(test_file, is_dir=False) assert renamed assert not exists(renamed) test_file = join(base_dir, '.') renamed = new_name(test_file, is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert '_' == result test_dir = base_dir + '/' renamed = new_name(test_dir, is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert result test_dir = join(base_dir, '.') renamed = new_name(test_dir, is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert '_' == result
def test_new_name_with_extensions(self): test_dir = self.get_test_loc('new_name/ext', copy=True) renamed = new_name(join(test_dir, 'test.txt'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test_3.txt' == result renamed = new_name(join(test_dir, 'TEST.txt'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'TEST_3.txt' == result renamed = new_name(join(test_dir, 'TEST.tXt'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'TEST_3.tXt' == result renamed = new_name(join(test_dir, 'test.txt'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test.txt_2' == result renamed = new_name(join(test_dir, 'teST.txt'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'teST.txt_2' == result
def test_file_name_on_path_and_location_10(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'tst' expected_name = 'tst' result = fileutils.file_name(test_file) assert expected_name == result result = fileutils.file_name((os.path.join(test_dir, test_file))) assert expected_name == result
def is_metadata_json(location): """ Return True if `location` path is for a Chef metadata.json file. The metadata.json is also used in Python installed packages in a 'dist-info' directory. """ return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'metadata.json' and not fileutils.file_name(fileutils.parent_directory( location)).lower().endswith('dist-info'))
def make_locations_relative(self, package_dict): """ Helper to transform absolute locations to a simple file name. """ for key, value in package_dict.items(): if not value: continue if key.endswith('location'): package_dict[key] = value and fileutils.file_name(value) or None if key.endswith('locations'): values = [v and fileutils.file_name(v) or None for v in value] package_dict[key] = values return package_dict
def make_locations_relative(self, package_dict): """ Helper to transform absolute locations to a simple file name. """ for key, value in package_dict.items(): if not value: continue if key.endswith('location'): package_dict[key] = value and fileutils.file_name( value) or None if key.endswith('locations'): values = [v and fileutils.file_name(v) or None for v in value] package_dict[key] = values return package_dict
def convert_to_utf8(location): """ Convert the file at location to UTF-8 text. Return the location of the converted file or None. """ if not get_type(location).is_text: return location start = open(location, 'rb').read(4096) encoding = chardet.detect(start) if encoding: encoding = encoding.get('encoding', None) if encoding: target = os.path.join(fileutils.get_temp_dir('markup'), fileutils.file_name(location)) with codecs.open(location, 'rb', encoding=encoding, errors='replace', buffering=16384) as inf: with codecs.open(target, 'wb', encoding='utf-8') as outf: outf.write(inf.read()) return target else: # chardet failed somehow to detect an encoding return location
def is_manifest(cls, location): """ Return True if the file at ``location`` is likely a manifest of this type. """ return (filetype.is_file(location) and fileutils.file_name(location).lower().endswith( ('.yaml', '.yml')))
def fixed_width_file_name(path, max_length=25): """ Return a fixed width file name of at most `max_length` characters extracted from the `path` string and usable for fixed width display. If the file_name is longer than `max_length`, it is truncated in the middle with using three dots "..." as an ellipsis and the extension is kept. For example: >>> short = fixed_width_file_name('0123456789012345678901234.c') >>> assert '0123456789...5678901234.c' == short """ if not path: return '' # get the path as unicode for display! path = path_to_unicode(path) filename = fileutils.file_name(path) if len(filename) <= max_length: return filename base_name, extension = fileutils.splitext(filename) number_of_dots = 3 len_extension = len(extension) remaining_length = max_length - len_extension - number_of_dots if remaining_length < (len_extension + number_of_dots) or remaining_length < 5: return '' prefix_and_suffix_length = abs(remaining_length // 2) prefix = base_name[:prefix_and_suffix_length] ellipsis = number_of_dots * '.' suffix = base_name[-prefix_and_suffix_length:] return '{prefix}{ellipsis}{suffix}{extension}'.format(**locals())
def test_new_name_without_extensions(self): test_dir = self.get_test_loc('new_name/noext', copy=True) renamed = new_name(join(test_dir, 'test'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test_4' == result renamed = new_name(join(test_dir, 'TEST'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'TEST_4' == result renamed = new_name(join(test_dir, 'test_1'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test_1_1' == result
def parse_with_dparse(location): is_dir = filetype.is_dir(location) if is_dir: return file_name = fileutils.file_name(location) if file_name not in (filetypes.requirements_txt, filetypes.conda_yml, filetypes.tox_ini, filetypes.pipfile, filetypes.pipfile_lock): return if py2: mode = 'rb' else: mode = 'r' with open(location, mode) as f: content = f.read() df = dparse.parse(content, file_type=file_name) df_dependencies = df.dependencies if not df_dependencies: return package_dependencies = [] for df_dependency in df_dependencies: specs = df_dependency.specs requirement = None if specs: requirement = str(specs) package_dependencies.append( models.DependentPackage( purl=PackageURL(type='pypi', name=df_dependency.name).to_string(), scope='dependencies', is_runtime=True, is_optional=False, requirement=requirement, )) return package_dependencies
def get_file_infos(location): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import sha1, md5 from typecode import contenttype T = contenttype.get_type(location) is_file = T.is_file is_dir = T.is_dir infos = OrderedDict() infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos['sha1'] = is_file and sha1(location) or None infos['md5'] = is_file and md5(location) or None infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = is_file and T.is_binary or None infos['is_text'] = is_file and T.is_text or None infos['is_archive'] = is_file and T.is_archive or None infos['is_media'] = is_file and T.is_media or None infos['is_source'] = is_file and T.is_source or None infos['is_script'] = is_file and T.is_script or None return [infos]
def fixed_width_file_name(path, max_length=25): """ Return a fixed width file name of at most `max_length` characters extracted from the `path` string and usable for fixed width display. If the file_name is longer than `max_length`, it is truncated in the middle with using three dots "..." as an ellipsis and the extension is kept. For example: >>> short = fixed_width_file_name('0123456789012345678901234.c') >>> assert '0123456789...5678901234.c' == short """ if not path: return '' filename = fileutils.file_name(path) if len(filename) <= max_length: return filename base_name, extension = fileutils.splitext(filename) number_of_dots = 3 len_extension = len(extension) remaining_length = max_length - len_extension - number_of_dots if remaining_length < (len_extension + number_of_dots) or remaining_length < 5: return '' prefix_and_suffix_length = abs(remaining_length // 2) prefix = base_name[:prefix_and_suffix_length] ellipsis = number_of_dots * '.' suffix = base_name[-prefix_and_suffix_length:] return "{prefix}{ellipsis}{suffix}{extension}".format(**locals())
def test_new_name_without_extensions(self): test_dir = self.get_test_loc("new_name/noext", copy=True) renamed = new_name(join(test_dir, "test"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test_4" == result renamed = new_name(join(test_dir, "TEST"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "TEST_4" == result renamed = new_name(join(test_dir, "test_1"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test_1_1" == result
def parse(location): """ Return a Package built from parsing a file or directory at 'location' """ if filetype.is_dir(location): package = parse_unpackaged_source(location) if package: parse_dependencies(location, package) return package else: file_name = fileutils.file_name(location) parsers = { 'setup.py': parse_setup_py, 'requirements.txt': parse_requirements_txt, 'requirements.in': parse_requirements_txt, 'Pipfile.lock': parse_pipfile_lock, 'metadata.json': parse_metadata, 'PKG-INFO': parse_unpackaged_source, '.whl': parse_wheel, '.egg': parse_egg_binary, '.tar.gz': parse_source_distribution, '.zip': parse_source_distribution, } for name, parser in parsers.items(): if file_name.endswith(name): package = parser(location) if package: parent_directory = fileutils.parent_directory(location) parse_dependencies(parent_directory, package) return package
def parse2(location): """ Parse using the pkginfo library according the file types and return package. """ is_dir = filetype.is_dir(location) if is_dir: parser = parse_unpackaged_source package = parser(location) if package: parse_dependencies(location, package) return package else: file_name = fileutils.file_name(location) parsers = { 'setup.py': parse_unpackaged_source, '.whl': parse_wheel, '.egg': parse_egg_binary, '.tar.gz': parse_source_distribution, '.zip': parse_source_distribution, } for name, parser in parsers.items(): if file_name.endswith(name): package = parser(location) if package: parent_directory = fileutils.parent_directory(location) parse_dependencies(parent_directory, package) return package
def get_file_infos(location): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import sha1, md5 from typecode import contenttype T = contenttype.get_type(location) is_file = T.is_file is_dir = T.is_dir infos = OrderedDict() infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos['sha1'] = is_file and sha1(location) or None infos['md5'] = is_file and md5(location) or None infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = is_file and T.is_binary or None infos['is_text'] = is_file and T.is_text or None infos['is_archive'] = is_file and T.is_archive or None infos['is_media'] = is_file and T.is_media or None infos['is_source'] = is_file and T.is_source or None infos['is_script'] = is_file and T.is_script or None return [infos]
def download_url(url, file_name=None, verify=True, timeout=10): """ Fetch `url` and return the temporary location where the fetched content was saved. Use `file_name` if provided or create a new `file_name` base on the last url segment. If `verify` is True, SSL certification is performed. Otherwise, no verification is done but a warning will be printed. `timeout` is the timeout in seconds. """ requests_args = dict(timeout=timeout, verify=verify) file_name = file_name or fileutils.file_name(url) try: response = requests.get(url, **requests_args) except (ConnectionError, InvalidSchema) as e: logger.error('download_url: Download failed for %(url)r' % locals()) raise status = response.status_code if status != 200: msg = 'download_url: Download failed for %(url)r with %(status)r' % locals( ) logger.error(msg) raise Exception(msg) tmp_dir = fileutils.get_temp_dir(prefix='fetch-') output_file = os.path.join(tmp_dir, file_name) with open(output_file, 'wb') as out: out.write(response.content) return output_file
def download_url(url, file_name=None, verify=True): """ Return the temporary location of the file fetched at the remote url. Use file_name if provided or create a file name base on the last url segment. If verify is True, SSL certification is performed. Otherwise, no verification is done but a warning will be printed. """ requests_args = dict(timeout=10, verify=verify) file_name = file_name or fileutils.file_name(url) try: response = requests.get(url, **requests_args) except (ConnectionError, InvalidSchema) as e: logger.error('fetch: Download failed for %(url)r' % locals()) raise status = response.status_code if status != 200: msg = 'fetch: Download failed for %(url)r with %(status)r' % locals() logger.error(msg) raise Exception(msg) tmp_dir = fileutils.get_temp_dir(base_dir='fetch') output_file = os.path.join(tmp_dir, file_name) with open(output_file, 'wb') as out: out.write(response.content) return output_file
def get_source_file_path_references(location): """ Yield unique references to source file paths extracted from DWARF debug symbols from the Elf file at `location`. If there are errors when processing Elfs, these are returned as well as paths prefixed with 'ERROR: '. """ if not os.path.exists(location): return T = contenttype.get_type(location) if not T.is_elf: return unique_files = set() unique_paths = set() errors = [] try: with_libdwarf = dwarf.Dwarf(location) for path in with_libdwarf.included_source_files: if '/' not in path: # bare file name unique_files.add(path) else: unique_paths.add(path) for path in with_libdwarf.original_source_files: if '/' not in path: # bare file name unique_files.add(path) else: unique_paths.add(path) except Exception as lde: msg = str(lde) _, m1, m2 = msg.partition('dwarfdump') errors.append(''.join([m1, m2])) try: with_binutils_nm = dwarf2.get_dwarfs(location) for entry in with_binutils_nm: path = entry.path if '/' not in path: # bare file name unique_files.add(path) else: unique_paths.add(path) except Exception as lde: msg = str(lde) errors.append(str) seen_file_names = set(file_name(p) for p in unique_paths) for fn in unique_files: if fn not in seen_file_names and fn not in ignores: unique_paths.add(fn) for error in errors: yield 'ERROR: ' + error for path in sorted(unique_paths): yield path
def is_requirements_file(location): """ Return True if the ``location`` is likely for a pip requirements file. For example:: >>> is_requirements_file('dev-requirements.txt') True >>> is_requirements_file('requirements.txt') True >>> is_requirements_file('requirements.in') True >>> is_requirements_file('requirements.pip') True >>> is_requirements_file('requirements-dev.txt') True >>> is_requirements_file('some-requirements-dev.txt') True >>> is_requirements_file('reqs.txt') False >>> is_requirements_file('requires.txt') True """ filename = fileutils.file_name(location) req_files = ( '*requirements*.txt', '*requirements*.pip', '*requirements*.in', 'requires.txt', ) return any(fnmatch.fnmatchcase(filename, rf) for rf in req_files)
def dwarf_source_path(location): """ Collect unique paths to compiled source code found in Elf binaries DWARF sections for D2D. """ location = location T = contenttype.get_type(location) if not (T.is_elf or T.is_stripped_elf): return seen_paths = set() path_file_names = set() bare_file_names = set() for dpath in chain(get_dwarf1(location), get_dwarf2(location)): if dpath in seen_paths: continue fn = fileutils.file_name(dpath) if fn == dpath: bare_file_names.add(fn) continue else: path_file_names.add(fn) seen_paths.add(dpath) yield dpath # only yield filename that do not exist as full paths for bfn in sorted(bare_file_names): if bfn not in path_file_names and bfn not in seen_paths: yield bfn seen_paths.add(bfn)
def parse_with_dparse(location): is_dir = filetype.is_dir(location) if is_dir: return file_name = fileutils.file_name(location) dependency_type = get_dependency_type(file_name) if dependency_type not in (filetypes.requirements_txt, filetypes.conda_yml, filetypes.tox_ini, filetypes.pipfile, filetypes.pipfile_lock): return if py2: mode = 'rb' else: mode = 'r' with open(location, mode) as f: content = f.read() df = dparse.parse(content, file_type=dependency_type) df_dependencies = df.dependencies if not df_dependencies: return package_dependencies = [] for df_dependency in df_dependencies: specs = list(df_dependency.specs._specs) is_resolved = False requirement = None purl = PackageURL( type='pypi', name=df_dependency.name ).to_string() if specs: requirement = str(df_dependency.specs) for spec in specs: operator = spec.operator version = spec.version if any(operator == element for element in ('==', '===')): is_resolved = True purl = PackageURL( type='pypi', name=df_dependency.name, version=version ).to_string() package_dependencies.append( models.DependentPackage( purl=purl, scope='dependencies', is_runtime=True, is_optional=False, is_resolved=is_resolved, requirement=requirement ) ) return package_dependencies
def is_manifest(cls, location): """ Return True if `location` path is for a Chef metadata.json file. The metadata.json is also used in Python installed packages in a 'dist-info' directory. """ return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'metadata.rb')
def recognize(cls, location): filename = fileutils.file_name(location).lower() if filename == 'go.mod': gomods = go_mod.parse_gomod(location) yield build_gomod_package(gomods) elif filename == 'go.sum': gosums = go_mod.parse_gosum(location) yield build_gosum_package(gosums)
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) infos['name'] = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def is_manifest(cls, location): """ Return True if the file at ``location`` is likely a manifest of this type. """ return (filetype.is_file(location) and fileutils.file_name(location).lower() in [ 'readme.android', 'readme.chromium', 'readme.facebook', 'readme.google', 'readme.thirdparty' ])
def is_datafile(cls, location, filetypes=tuple()): """ Return True if `location` path is for a Chef metadata.json file. The metadata.json is/was also used in Python legacy wheels in a 'dist-info' directory. """ if super().is_datafile(location, filetypes=filetypes): parent = fileutils.file_name(fileutils.parent_directory(location)) return not parent.endswith('dist-info')
def parse(location): """ Return a Package object from a Cargo.toml/Cargo.lock file. """ handlers = {'cargo.toml': build_cargo_toml_package, 'cargo.lock': build_cargo_lock_package} filename = filetype.is_file(location) and fileutils.file_name(location).lower() handler = handlers.get(filename) if handler: return handler and handler(toml.load(location, _dict=dict))
def new_name(location, is_dir=False): """ Return a new non-existing location from a `location` usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the filename. The case of the filename is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique filename, this tries new names this way: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location if on_linux: location = path_to_bytes(location) location = location.rstrip(PATHS_SEPS) assert location parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) filename = fileutils.file_name(location) # corner case if filename in (DOT, DOT): filename = UNDERSCORE # if unique, return this if filename.lower() not in siblings_lower: return os.path.join(parent, filename) # otherwise seek a unique name if is_dir: # directories do not have an "extension" base_name = filename ext = EMPTY_STRING else: base_name, dot, ext = filename.partition(DOT) if dot: ext = dot + ext else: base_name = filename ext = EMPTY_STRING # find a unique filename, adding a counter int to the base_name counter = 1 while 1: filename = base_name + UNDERSCORE + str(counter) + ext if filename.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, filename)
def new_name(location, is_dir=False): """ Return a new non-existing location from a `location` usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the filename. The case of the filename is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique filename, this tries new names this way: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location if on_linux: location = fsencode(location) location = location.rstrip(PATHS_SEPS) assert location parent = parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) filename = file_name(location) # corner case if filename in (DOT, DOT): filename = UNDERSCORE # if unique, return this if filename.lower() not in siblings_lower: return join(parent, filename) # otherwise seek a unique name if is_dir: # directories do not have an "extension" base_name = filename ext = EMPTY_STRING else: base_name, dot, ext = filename.partition(DOT) if dot: ext = dot + ext else: base_name = filename ext = EMPTY_STRING # find a unique filename, adding a counter int to the base_name counter = 1 while 1: filename = base_name + UNDERSCORE + str(counter) + ext if filename.lower() not in siblings_lower: break counter += 1 return join(parent, filename)
def is_manifest(cls, location): """ Return True if the file at ``location`` is likely a manifest of this type. Sub-classes should override to implement their own manifest recognition. """ if not filetype.is_file(location): return filename = file_name(location) file_patterns = cls.file_patterns if any( fnmatch.fnmatchcase(filename, metaf) for metaf in file_patterns): return True T = contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file _base_name, extension = splitext_name(location, is_file=True) extension = extension.lower() if TRACE: logger_debug( 'is_manifest: ftype:', ftype, 'mtype:', mtype, 'pygtype:', T.filetype_pygment, 'fname:', filename, 'ext:', extension, ) type_matched = False if cls.filetypes: type_matched = any(t in ftype for t in cls.filetypes) mime_matched = False if cls.mimetypes: mime_matched = any(m in mtype for m in cls.mimetypes) extension_matched = False extensions = cls.extensions if extensions: extensions = (e.lower() for e in extensions) extension_matched = any( fnmatch.fnmatchcase(extension, ext_pat) for ext_pat in extensions) if type_matched and mime_matched and extension_matched: return True
def test_file_name_on_path_and_location(self): test_dir = self.get_test_loc("fileutils/basename", copy=True) tests = [ ("a/.a/file", "file"), ("a/.a/", ".a"), ("a/b/.a.b", ".a.b"), ("a/b/a.tag.gz", "a.tag.gz"), ("a/b/", "b"), ("a/f.a", "f.a"), ("a/", "a"), ("f.a/a.c", "a.c"), ("f.a/", "f.a"), ("tst", "tst"), ] for test_file, name in tests: result = fileutils.file_name(test_file) assert name == result # also test on location result = fileutils.file_name((os.path.join(test_dir, test_file))) assert name == result
def test_file_name_on_path_and_location(self): test_dir = self.get_test_loc('fileutils/basename', copy=True) tests = [ ('a/.a/file', 'file'), ('a/.a/', '.a'), ('a/b/.a.b', '.a.b'), ('a/b/a.tag.gz', 'a.tag.gz'), ('a/b/', 'b'), ('a/f.a', 'f.a'), ('a/', 'a'), ('f.a/a.c', 'a.c'), ('f.a/', 'f.a'), ('tst', 'tst'), ] for test_file, name in tests: result = fileutils.file_name(test_file) assert name == result # also test on location result = fileutils.file_name((os.path.join(test_dir, test_file))) assert name == result
def parse_dependency_file(location): """ Return a PythonPackage built from a dparse-supported dependency file at location. """ if not location: return dt = get_dparse_dependency_type(fileutils.file_name(location)) if dt: dependent_packages = parse_with_dparse(location) return PythonPackage(dependencies=dependent_packages)
def is_java_source(self): """ FIXME: Check the filetype. """ if self.is_file is True: name = fileutils.file_name(self.location) if fnmatch.fnmatch(name, "*.java") or fnmatch.fnmatch(name, "*.aj") or fnmatch.fnmatch(name, "*.ajt"): return True else: return False else: return False
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype if on_linux: location = path_to_bytes(location) else: location = path_to_unicode(location) infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' if on_linux: infos['name'] = path_to_unicode(name) infos['base_name'] = path_to_unicode(base_name) infos['extension'] = path_to_unicode(extension) else: infos['name'] = name infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos.update(multi_checksums(location, ('sha1', 'md5',))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def is_java_class(self): """ FIXME: Check the filetype. """ if self.is_file is True: name = fileutils.file_name(self.location) if fnmatch.fnmatch(name, "*?.class"): return True else: return False else: return False
def recognize(cls, location): if not cls.is_manifest(location): return # Thanks to Starlark being a Python dialect, we can use the `ast` # library to parse it with open(location, 'rb') as f: tree = ast.parse(f.read()) build_rules = defaultdict(list) for statement in tree.body: # We only care about function calls or assignments to functions whose # names ends with one of the strings in `rule_types` if (isinstance(statement, ast.Expr) or isinstance(statement, ast.Call) or isinstance(statement, ast.Assign) and isinstance(statement.value, ast.Call) and isinstance(statement.value.func, ast.Name)): rule_name = statement.value.func.id # Ensure that we are only creating packages from the proper # build rules if not check_rule_name_ending(rule_name): continue # Process the rule arguments args = {} for kw in statement.value.keywords: arg_name = kw.arg if isinstance(kw.value, ast.Str): args[arg_name] = kw.value.s if isinstance(kw.value, ast.List): # We collect the elements of a list if the element is not a function call args[arg_name] = [ elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call) ] if args: build_rules[rule_name].append(args) if build_rules: for rule_name, rule_instances_args in build_rules.items(): for args in rule_instances_args: name = args.get('name') if not name: continue license_files = args.get('licenses') yield cls(name=name, declared_license=license_files, root_path=fileutils.parent_directory(location)) else: # If we don't find anything in the manifest file, we yield a Package with # the parent directory as the name yield cls( name=fileutils.file_name(fileutils.parent_directory(location)))
def get_relative_path(path, len_base_path, base_is_dir): """ Return a posix relative path from the posix 'path' relative to a base path of `len_base_path` length where the base is a directory if `base_is_dir` True or a file otherwise. """ path = path_to_unicode(path) if base_is_dir: rel_path = path[len_base_path:] else: rel_path = fileutils.file_name(path) return rel_path.lstrip('/')
def extract_event(item): """ Display an extract event. """ if not item: return '' if verbose: if item.done: return '' line = utils.get_relative_path(original_input, abs_input, as_posixpath(item.source)) or '' else: line = fileutils.file_name(item.source) or '' return 'Extracting: %(line)s' % locals()
def __init__(self, data_file=None, test_file=None): self.data_file = data_file self.test_file = test_file if self.test_file: self.test_file_name = fileutils.file_name(test_file) if self.data_file: with codecs.open(data_file, mode='rb', encoding='utf-8') as df: data = saneyaml.load(df.read()) self.licenses = data.get('licenses', []) self.notes = data.get('notes') self.sort = data.get('sort') self.expected_failure = data.get('expected_failure', False)
def parse(location): """ Return a Package object from a composer.json file or None. """ if not is_phpcomposer_json(location): return with codecs.open(location, encoding='utf-8') as loc: package_data = json.load(loc, object_pairs_hook=OrderedDict) base_dir = fileutils.parent_directory(location) metafile_name = fileutils.file_name(location) return build_package(package_data, base_dir, metafile_name)
def test_new_name_with_empties(self): test_dir = self.get_temp_dir() self.assertRaises(AssertionError, new_name, "", is_dir=False) renamed = new_name(join(test_dir, "/"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result renamed = new_name(join(test_dir, "."), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result renamed = new_name(join(test_dir, "/"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result renamed = new_name(join(test_dir, "."), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result
def __init__(self, data_file=None, test_file=None): self.data_file = data_file self.test_file = test_file if self.test_file: self.test_file_name = fileutils.file_name(test_file) if self.data_file: with codecs.open(data_file, mode="rb", encoding="utf-8") as df: data = saneyaml.load(df.read()) self.licenses = data.get("licenses", []) # TODO: this is for future support of license expressions self.license = data.get("license", None) self.notes = data.get("notes") self.expected_failure = data.get("expected_failure", False)
def is_patch(location, include_extracted=False): """ Test if a file is a possible patch file. May return True for some files that are not patches. Extracted patch files are ignored by default. """ T = typecode.contenttype.get_type(location) file_name = fileutils.file_name(location) patch_like = "diff " in T.filetype_file.lower() or ".diff" in file_name or ".patch" in file_name if not patch_like: return False if extractcode.is_extraction_path(file_name) and not include_extracted: return False return True
def new_name(location, is_dir=False): """ Return a new non-existing location usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the name. The case of the name is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique name: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location location = location.rstrip('\\/') name = fileutils.file_name(location).strip() if (not name or name == '.' # windows bare drive path as in c: or z: or (name and len(name)==2 and name.endswith(':'))): name = 'file' parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) if name.lower() not in siblings_lower: return posixpath.join(parent, name) ext = fileutils.file_extension(name) base_name = fileutils.file_base_name(name) if is_dir: # directories have no extension ext = '' base_name = name counter = 1 while True: new_name = base_name + '_' + str(counter) + ext if new_name.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, new_name)
def _get_root_dir(input_path, strip_root=False, full_root=False): """ Return a root dir name or None. On Windows, the path uses POSIX (forward slash) separators. """ if strip_root: return scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path))) scanned_path = fileutils.as_posixpath(scanned_path) if filetype.is_dir(scanned_path): root_dir = scanned_path else: root_dir = fileutils.parent_directory(scanned_path) root_dir = fileutils.as_posixpath(root_dir) if full_root: return root_dir else: return fileutils.file_name(root_dir)
def extract_event(item): """ Display an extract event. """ if quiet: return '' if not item: return '' source = item.source if not isinstance(source, unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') if verbose: if item.done: return '' line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or '' else: line = source and fileutils.file_name(source) or '' if not isinstance(line, unicode): line = toascii(line, translit=True).decode('utf-8', 'replace') return 'Extracting: %(line)s' % locals()
def recon(self, location): for f in os.listdir(location): loc = join(location, f) if not filetype.is_file(loc): continue # a pom is an xml doc if not is_pom(location): continue if f == 'pom.xml': # first case: a maven pom.xml inside a META-INF directory # such as in META-INF/maven/log4j/log4j/pom.xml # the directory tree has a fixed depth # as is: META-INF/maven/groupid/artifactid/pom.xml # this will typically be inside a binary jar, so we should find # a typical structure above try: gggp = dirname(dirname(dirname(dirname(loc)))) if fileutils.file_name(gggp) == 'META-INF': # recon here: the root of the component is the parent of # META-INF, return that, with a type and the POM # metafile to parse. pass except: pass # second case: a maven pom.xml at the root of component # development tree we should find a few extra clues in the # conventional directory structure below for now we take this as # being the component root. return that, with a type and the POM # metafile to parse. pass elif f.endswith('.pom'): # first case: a maven repo layout # the jars are side-by-side with the pom # check if there are side-by-side artifacts jar = loc.replace('.pom', '.jar') if os.path.exists(jar): # return that, with a type and the POM metafile to parse. pass
def convert_to_utf8(location): """ Convert the file at location to UTF-8 text. Return the location of the converted file or None. """ if not contenttype.get_type(location).is_text: return location start = open(location, 'rb').read(4096) encoding = chardet.detect(start) if encoding: encoding = encoding.get('encoding', None) if encoding: target = os.path.join(fileutils.get_temp_dir('markup'), fileutils.file_name(location)) with codecs.open(location, 'rb', encoding=encoding, errors='replace', buffering=16384) as inf: with codecs.open(target, 'wb', encoding='utf-8') as outf: outf.write(inf.read()) return target else: # chardet failed somehow to detect an encoding return location
def get_tokens(location, template, use_cache=False): """ Return a list of tokens from a from a file at location using the tokenizer function. """ location = os.path.abspath(location) if not exists(location): raise RuleWithNoTokenError('Rule text location does not exist: %(location)r' % locals()) # return [] file_name = fileutils.file_name(location) cached_tokens = os.path.join(cache_dir, file_name) if use_cache and os.path.exists(cached_tokens): # TODO: improve cache check tokens = list(load_tokens(cached_tokens)) else: tokenizr = template and template_tknzr or text_tknzr lines = analysis.unicode_text_lines(location) tokens = list(tokenizr(lines)) if use_cache: dump_tokens(cached_tokens, tokens) return tokens
def __init__(self, data_file=None, test_file=None): self.data_file = data_file self.test_file = test_file if self.test_file: self.test_file_name = fileutils.file_name(test_file) if self.data_file: with codecs.open(data_file, mode='rb', encoding='utf-8') as df: data = saneyaml.load(df.read()) self.licenses = data.get('licenses', []) # TODO: this is for future support of license expressions self.license = data.get('license') self.license_choice = data.get('license_choice') self.notes = data.get('notes') # True if the test is expected to fail self.expected_failure = data.get('expected_failure', False) # True if the test should be skipped self.skip = data.get('skip', False)
def as_template(scan_data, template='html'): """ Return an string built from a list of results and the provided template. The template defaults to the standard HTML template format or can point to the path of a custom template file. """ from licensedcode.models import get_license if template == 'html': template = get_template(get_template_dir('html')) else: # load a custom template tpath = fileutils.as_posixpath(abspath(expanduser(template))) assert isfile(tpath) tdir = fileutils.parent_directory(tpath) tfile = fileutils.file_name(tpath) template = get_template(tdir, tfile) converted = OrderedDict() converted_infos = OrderedDict() converted_packages = OrderedDict() licenses = {} # Create a flattened data dict keyed by location for scan_result in scan_data: location = scan_result['location'] results = [] if 'copyrights' in scan_result: for entry in scan_result['copyrights']: results.append({ 'start': entry['start_line'], 'end': entry['end_line'], 'what': 'copyright', # NOTE: we display one statement per line. 'value': '\n'.join(entry['statements']), }) if 'licenses' in scan_result: for entry in scan_result['licenses']: results.append({ 'start': entry['start_line'], 'end': entry['end_line'], 'what': 'license', 'value': entry['key'], }) if entry['key'] not in licenses: licenses[entry['key']] = entry entry['object'] = get_license(entry['key']) if results: converted[location] = sorted(results, key=itemgetter('start')) if 'infos' in scan_result: converted_infos[location] = scan_result['infos'] if 'packages' in scan_result: converted_packages[location] = scan_result['packages'] licenses = OrderedDict(sorted(licenses.items())) results = { 'license_copyright': converted, 'infos': converted_infos, 'packages': converted_packages } return template.render(results=results, licenses=licenses)