def recognizes(cls, file): if not super().recognizes(file): return False with open(file.path, 'rb') as f: # We can parse .buildinfo files just like .dsc buildinfo = Dsc(f) if 'Checksums-Sha256' not in buildinfo: return False for d in buildinfo.get('Checksums-Sha256'): sha256 = hashlib.sha256() # XXX: this will not work for containers in_buildinfo_path = os.path.join( os.path.dirname(file.path), d['Name'], ) if not os.path.exists(in_buildinfo_path): return False with open(in_buildinfo_path, 'rb') as f: for buf in iter(functools.partial(f.read, 32768), b''): sha256.update(buf) if sha256.hexdigest() != d['sha256']: return False file._deb822 = buildinfo return True
def recognizes(cls, file): if not super().recognizes(file): return False with open(file.path, 'rb') as f: dsc = Dsc(f) for d in dsc.get('Files'): md5 = hashlib.md5() # XXX: this will not work for containers in_dsc_path = os.path.join( os.path.dirname(file.path), d['Name'], ) if not os.path.exists(in_dsc_path): return False with open(in_dsc_path, 'rb') as f: for buf in iter(functools.partial(f.read, 32768), b''): md5.update(buf) if md5.hexdigest() != d['md5sum']: return False file._deb822 = dsc return True
def recognizes(file): if not DotDscFile.RE_FILE_EXTENSION.search(file.name): return False with open(file.path, 'rb') as f: dsc = Dsc(f) for d in dsc.get('Files'): md5 = hashlib.md5() # XXX: this will not work for containers in_dsc_path = os.path.join(os.path.dirname(file.path), d['Name']) if not os.path.exists(in_dsc_path): return False with open(in_dsc_path, 'rb') as f: for buf in iter(partial(f.read, 32768), b''): md5.update(buf) if md5.hexdigest() != d['md5sum']: return False file._deb822 = dsc return True
def recognizes(file): if not DotBuildinfoFile.RE_FILE_EXTENSION.search(file.name): return False with open(file.path, 'rb') as f: # We can parse .buildinfo just like .dsc buildinfo = Dsc(f) if not 'Checksums-Sha256' in buildinfo: return False for d in buildinfo.get('Checksums-Sha256'): sha256 = hashlib.sha256() # XXX: this will not work for containers in_buildinfo_path = os.path.join(os.path.dirname(file.path), d['Name']) if not os.path.exists(in_buildinfo_path): return False with open(in_buildinfo_path, 'rb') as f: for buf in iter(partial(f.read, 32768), b''): sha256.update(buf) if sha256.hexdigest() != d['sha256']: return False file._deb822 = buildinfo return True
def get_intrinsic_package_metadata( p_info: DebianPackageInfo, dsc_path: str, extracted_path: str) -> IntrinsicPackageMetadata: """Get the package metadata from the source package at dsc_path, extracted in extracted_path. Args: p_info: the package information dsc_path: path to the package's dsc file extracted_path: the path where the package got extracted Returns: dict: a dictionary with the following keys: - history: list of (package_name, package_version) tuples parsed from the package changelog """ with open(dsc_path, "rb") as dsc: parsed_dsc = Dsc(dsc) # Parse the changelog to retrieve the rest of the package information changelog_path = path.join(extracted_path, "debian/changelog") with open(changelog_path, "rb") as changelog_file: try: parsed_changelog = Changelog(changelog_file) except UnicodeDecodeError: logger.warning( "Unknown encoding for changelog %s," " falling back to iso" % changelog_path, extra={ "swh_type": "deb_changelog_encoding", "swh_name": p_info.name, "swh_version": str(p_info.version), "swh_changelog": changelog_path, }, ) # need to reset as Changelog scrolls to the end of the file changelog_file.seek(0) parsed_changelog = Changelog(changelog_file, encoding="iso-8859-15") history: List[Tuple[str, str]] = [] for block in parsed_changelog: assert block.package is not None history.append((block.package, str(block.version))) changelog = DebianPackageChangelog( person=uid_to_person(parsed_changelog.author), date=parse_date(parsed_changelog.date).isoformat(), history=history[1:], ) maintainers = [ uid_to_person(parsed_dsc["Maintainer"]), ] maintainers.extend( uid_to_person(person) for person in UPLOADERS_SPLIT.split(parsed_dsc.get("Uploaders", ""))) return IntrinsicPackageMetadata( name=p_info.name, version=str(p_info.intrinsic_version), changelog=changelog, maintainers=maintainers, )
def get_package_metadata(package, extracted_path, keyrings, log=None): """Get the package metadata from the source package at dsc_path, extracted in extracted_path. Args: package: the package dict (with a dsc_path key) extracted_path: the path where the package got extracted keyrings: a list of keyrings to use for gpg actions log: a logging.Logger object Returns: a dict with the following keys history: list of (package_name, package_version) tuples parsed from the package changelog source_files: information about all the files in the source package """ ret = {} # Parse the dsc file to retrieve all the original artifact files dsc_path = package['dsc'] with open(dsc_path, 'rb') as dsc: parsed_dsc = Dsc(dsc) source_files = [get_file_info(dsc_path)] dsc_dir = os.path.dirname(dsc_path) for file in parsed_dsc['files']: file_path = os.path.join(dsc_dir, file['name']) file_info = get_file_info(file_path) source_files.append(file_info) ret['original_artifact'] = source_files # Parse the changelog to retrieve the rest of the package information changelog_path = os.path.join(extracted_path, b'debian/changelog') with open(changelog_path, 'rb') as changelog: try: parsed_changelog = Changelog(changelog) except UnicodeDecodeError: if log: log.warn('Unknown encoding for changelog %s,' ' falling back to iso' % changelog_path.decode('utf-8'), extra={ 'swh_type': 'deb_changelog_encoding', 'swh_name': package['name'], 'swh_version': str(package['version']), 'swh_changelog': changelog_path.decode('utf-8'), }) # need to reset as Changelog scrolls to the end of the file changelog.seek(0) parsed_changelog = Changelog(changelog, encoding='iso-8859-15') package_info = { 'name': package['name'], 'version': str(package['version']), 'lister_metadata': { 'lister': 'snapshot.debian.org', 'id': package['id'], }, 'changelog': { 'person': converters.uid_to_person(parsed_changelog.author), 'date': parse_date(parsed_changelog.date), 'history': [(block.package, str(block.version)) for block in parsed_changelog][1:], } } try: gpg_info = parsed_dsc.get_gpg_info(keyrings=keyrings) package_info['pgp_signature'] = get_gpg_info_signature(gpg_info) except ValueError: if log: log.info('Could not get PGP signature on package %s_%s' % (package['name'], package['version']), extra={ 'swh_type': 'deb_missing_signature', 'swh_name': package['name'], 'swh_version': str(package['version']), }) package_info['pgp_signature'] = None maintainers = [ converters.uid_to_person(parsed_dsc['Maintainer'], encode=False), ] maintainers.extend( converters.uid_to_person(person, encode=False) for person in UPLOADERS_SPLIT.split(parsed_dsc.get('Uploaders', '')) ) package_info['maintainers'] = maintainers ret['package_info'] = package_info return ret
def get_package_metadata(package, dsc_path, extracted_path): """Get the package metadata from the source package at dsc_path, extracted in extracted_path. Args: package: the package dict (with a dsc_path key) dsc_path: path to the package's dsc file extracted_path: the path where the package got extracted Returns: dict: a dictionary with the following keys: - history: list of (package_name, package_version) tuples parsed from the package changelog - source_files: information about all the files in the source package """ ret = {} with open(dsc_path, 'rb') as dsc: parsed_dsc = Dsc(dsc) source_files = [get_file_info(dsc_path)] dsc_dir = os.path.dirname(dsc_path) for filename in package['files']: file_path = os.path.join(dsc_dir, filename) file_info = get_file_info(file_path) source_files.append(file_info) ret['original_artifact'] = source_files # Parse the changelog to retrieve the rest of the package information changelog_path = os.path.join(extracted_path, 'debian/changelog') with open(changelog_path, 'rb') as changelog: try: parsed_changelog = Changelog(changelog) except UnicodeDecodeError: log.warn('Unknown encoding for changelog %s,' ' falling back to iso' % changelog_path.decode('utf-8'), extra={ 'swh_type': 'deb_changelog_encoding', 'swh_name': package['name'], 'swh_version': str(package['version']), 'swh_changelog': changelog_path.decode('utf-8'), }) # need to reset as Changelog scrolls to the end of the file changelog.seek(0) parsed_changelog = Changelog(changelog, encoding='iso-8859-15') package_info = { 'name': package['name'], 'version': str(package['version']), 'changelog': { 'person': converters.uid_to_person(parsed_changelog.author), 'date': parse_date(parsed_changelog.date), 'history': [(block.package, str(block.version)) for block in parsed_changelog][1:], } } maintainers = [ converters.uid_to_person(parsed_dsc['Maintainer'], encode=False), ] maintainers.extend( converters.uid_to_person(person, encode=False) for person in UPLOADERS_SPLIT.split(parsed_dsc.get('Uploaders', '')) ) package_info['maintainers'] = maintainers ret['package_info'] = package_info return ret