class PackageFile(BaseModel): """ A file that belongs to a package. """ path = String( label='Path of this installed file', help='The path of this installed file either relative to a rootfs ' '(typical for system packages) or a path in this scan (typical ' 'for application packages).', repr=True, ) size = Integer(label='file size', help='size of the file in bytes') sha1 = String(label='SHA1 checksum', help='SHA1 checksum for this file in hexadecimal') md5 = String(label='MD5 checksum', help='MD5 checksum for this file in hexadecimal') sha256 = String(label='SHA256 checksum', help='SHA256 checksum for this file in hexadecimal') sha512 = String(label='SHA512 checksum', help='SHA512 checksum for this file in hexadecimal')
class FileReference(ModelMixin): """ A reference to a file in a files listing from a manifest or data file. """ path = String( label='Path of this file.', help='The file or directory POSIX path. The actual root for this path ' 'is specific to a datafile format. For instance it is the rootfs ' 'root for Linux system packages.', repr=True, ) size = Integer( label='file size', help='size of the file in bytes', repr=False, ) sha1 = String( label='SHA1 checksum', help='SHA1 checksum for this file in hexadecimal', repr=False, ) md5 = String( label='MD5 checksum', help='MD5 checksum for this file in hexadecimal', repr=False, ) sha256 = String( label='SHA256 checksum', help='SHA256 checksum for this file in hexadecimal', repr=False, ) sha512 = String( label='SHA512 checksum', help='SHA512 checksum for this file in hexadecimal', repr=False, ) extra_data = Mapping( label='extra data', help='A mapping of arbitrary extra file reference data.', ) def update(self, other): """ Update this reference with an other file reference only for non-empty values. """ for name, value in other.to_dict().items(): if not value: continue current = getattr(self, name, None) if not current: setattr(self, name, value) return self
class Package(BasePackage): """ A package object as represented by its manifest data. """ # Optional. Public default type for a package class. default_primary_language = None primary_language = String( label='Primary programming language', help='Primary programming language', ) description = String( label='Description', help='Description for this package. ' 'By convention the first should be a summary when available.') release_date = Date(label='release date', help='Release date of the package') parties = List( item_type=Party, label='parties', help='A list of parties such as a person, project or organization.') keywords = List(item_type=str, label='keywords', help='A list of keywords.') homepage_url = String(label='homepage URL', help='URL to the homepage for this package.') download_url = String(label='Download URL', help='A direct download URL.') size = Integer(default=None, label='download size', help='size of the package download in bytes') sha1 = String(label='SHA1 checksum', help='SHA1 checksum for this download in hexadecimal') md5 = String(label='MD5 checksum', help='MD5 checksum for this download in hexadecimal') sha256 = String(label='SHA256 checksum', help='SHA256 checksum for this download in hexadecimal') sha512 = String(label='SHA512 checksum', help='SHA512 checksum for this download in hexadecimal') bug_tracking_url = String( label='bug tracking URL', help='URL to the issue or bug tracker for this package') code_view_url = String(label='code view URL', help='a URL where the code can be browsed online') vcs_url = String( help='a URL to the VCS repository in the SPDX form of: ' 'https://github.com/nexb/scancode-toolkit.git@405aaa4b3 ' 'See SPDX specification "Package Download Location" ' 'at https://spdx.org/spdx-specification-21-web-version#h.49x2ik5 ') copyright = String( label='Copyright', help='Copyright statements for this package. Typically one per line.') license_expression = String( label='license expression', help='The license expression for this package typically derived ' 'from its declared license or .') declared_license = String( label='declared license', help='The declared license mention, tag or text as found in a ' 'package manifest.') notice_text = String(label='notice text', help='A notice text for this package.') root_path = String( label='package root path', help='The path to the root of the package documented in this manifest ' 'if any, such as a Maven .pom or a npm package.json parent directory.') dependencies = List(item_type=DependentPackage, label='dependencies', help='A list of DependentPackage for this package. ') contains_source_code = TriBoolean( label='contains source code', help= 'Flag set to True if this package contains its own source code, None ' 'if this is unknown, False if not.') source_packages = List( item_type=String, label='List of related source code packages', help='A list of related source code Package URLs (aka. "purl") for ' 'this package. For instance an SRPM is the "source package" for a ' 'binary RPM.') def __attrs_post_init__(self, *args, **kwargs): if not self.type and hasattr(self, 'default_type'): self.type = self.default_type if not self.primary_language and hasattr(self, 'default_primary_language'): self.primary_language = self.default_primary_language @classmethod def recognize(cls, location): """ Yield one or more Package objects given a file location pointing to a package archive, manifest or similar. Sub-classes should override to implement their own package recognition. """ raise NotImplementedError @classmethod def get_package_root(cls, manifest_resource, codebase): """ Return the Resource for the package root given a `manifest_resource` Resource object that represents a manifest in the `codebase` Codebase. Each package type and instance have different conventions on how a package manifest relates to the root of a package. For instance, given a "package.json" file, the root of an npm is the parent directory. The same applies with a Maven "pom.xml". In the case of a "xyz.pom" file found inside a JAR META-INF/ directory, the root is the JAR itself which may not be the direct parent Each package type should subclass as needed. This default to return the same path. """ return manifest_resource @classmethod def get_package_resources(cls, package_root, codebase): """ Yield the Resources of a Package starting from `package_root` """ if not Package.is_ignored_package_resource(package_root, codebase): yield package_root for resource in package_root.walk( codebase, topdown=True, ignored=Package.is_ignored_package_resource): yield resource @classmethod def ignore_resource(cls, resource, codebase): """ Return True if `resource` should be ignored. """ return False @staticmethod def is_ignored_package_resource(resource, codebase): from packagedcode import PACKAGE_TYPES return any( pt.ignore_resource(resource, codebase) for pt in PACKAGE_TYPES) def compute_normalized_license(self): """ Return a normalized license_expression string using the declared_license field. Return 'unknown' if there is a declared license but it cannot be detected and return None if there is no declared license Subclasses can override to handle specifics such as supporting specific license ids and conventions. """ return compute_normalized_license(self.declared_license) @classmethod def extra_key_files(cls): """ Return a list of extra key file paths (or path glob patterns) beyond standard, well known key files for this Package. List items are strings that are either paths or glob patterns and are relative to the package root. Knowing if a file is a "key-file" file is important for classification and summarization. For instance, a JAR can have key files that are not top level under the META-INF directory. Or a .gem archive contains a metadata.gz file. Sub-classes can implement as needed. """ return [] @classmethod def extra_root_dirs(cls): """ Return a list of extra package root-like directory paths (or path glob patterns) that should be considered to determine if a files is a top level file or not. List items are strings that are either paths or glob patterns and are relative to the package root. Knowing if a file is a "top-level" file is important for classification and summarization. Sub-classes can implement as needed. """ return []
class PackageData(IdentifiablePackageData): """ The data of a given package type. This is the core model to store normalized package data parsed from package datafiles (such as a manifest) or stored in a top-level package. """ primary_language = String( label='Primary programming language', help='Primary programming language', ) description = String( label='Description', help='Description for this package. ' 'By convention the first should be a summary when available.') release_date = Date(label='release date', help='Release date of the package') parties = List( item_type=Party, label='parties', help='A list of parties such as a person, project or organization.') keywords = List(item_type=str, label='keywords', help='A list of keywords.') homepage_url = String(label='homepage URL', help='URL to the homepage for this package.') download_url = String(label='Download URL', help='A direct download URL.') size = Integer(default=None, label='download size', help='size of the package download in bytes') sha1 = String( label='SHA1 checksum', help='SHA1 checksum for this package download in hexadecimal') md5 = String(label='MD5 checksum', help='MD5 checksum for this package download in hexadecimal') sha256 = String( label='SHA256 checksum', help='SHA256 checksum for this package download in hexadecimal') sha512 = String( label='SHA512 checksum', help='SHA512 checksum for this package download in hexadecimal') bug_tracking_url = String( label='bug tracking URL', help='URL to the issue or bug tracker for this package') code_view_url = String(label='code view URL', help='a URL where the code can be browsed online') vcs_url = String( help='a URL to the VCS repository in the SPDX form of: ' 'https://github.com/nexb/scancode-toolkit.git@405aaa4b3 ' 'See SPDX specification "Package Download Location" ' 'at https://spdx.org/spdx-specification-21-web-version#h.49x2ik5 ') copyright = String( label='Copyright', help='Copyright statements for this package. Typically one per line.') license_expression = String( label='license expression', help='The license expression for this package typically derived ' 'from its declared license or from some other type-specific ' 'routine or convention.') declared_license = String( label='declared license', help='The declared license mention, tag or text as found in a ' 'package manifest. This can be a string, a list or dict of ' 'strings possibly nested, as found originally in the manifest.') notice_text = String(label='notice text', help='A notice text for this package.') source_packages = List( item_type=str, label='List of related source code package purls', help='A list of related source code Package URLs (aka. "purl") for ' 'this package. For instance an SRPM is the "source package" for a ' 'binary RPM.') file_references = List( item_type=FileReference, label='referenced files', help='List of file paths and details for files referenced in a package ' 'manifest. These may not actually exist on the filesystem. ' 'The exact semantics and base of these paths is specific to a ' 'package type or datafile format.') extra_data = Mapping( label='extra data', help='A mapping of arbitrary extra package data.', ) dependencies = List(item_type=DependentPackage, label='dependencies', help='A list of DependentPackage for this package.') repository_homepage_url = String( label='package repository homepage URL.', help='URL to the page for this package in its package repository. ' 'This is typically different from the package homepage URL proper.') repository_download_url = String( label='package repository download URL.', help='download URL to download the actual archive of code of this ' 'package in its package repository. ' 'This may be different from the actual download URL.') api_data_url = String( label='package repository API URL.', help='API URL to obtain structured data for this package such as the ' 'URL to a JSON or XML api its package repository.') datasource_id = String( label='datasource id', help='Datasource identifier for the source of these package data.', repr=True, ) def to_dict(self, with_details=True, **kwargs): mapping = super().to_dict(with_details=with_details, **kwargs) if not with_details: # these are not used in the Package subclass mapping.pop('file_references', None) mapping.pop('dependencies', None) mapping.pop('datasource_id', None) return mapping @classmethod def from_dict(cls, mapping): """ Return an instance of PackageData built from a ``mapping`` native Python data. Known attributes that store a list of objects are also "rehydrated" (such as models.Party). Unknown attributes provided in ``mapping`` that do not exist as fields in the class are kept as items in the extra_data mapping. An Exception is raised if an "unknown attribute" name already exists as an extra_data name. """ # TODO: consider using a proper library for this such as cattrs, # marshmallow, etc. or use the field type that we declare. # Each of these are lists of class instances tracked here, which are stored # as a list of mappings in scanc_data # these are computed attributes serialized on a package # that should not be recreated when de-serializing computed_attributes = set([ 'purl', ]) fields_by_name = attr.fields_dict(cls) extra_data = mapping.get('extra_data', {}) or {} package_data = {} list_fields_by_item = { 'parties': Party, 'dependencies': DependentPackage, 'file_references': FileReference, } for name, value in mapping.items(): if not value: continue if name in computed_attributes: continue field = fields_by_name.get(name) if not field: # keep unknown fields as extra data if name not in extra_data: extra_data[name] = value continue else: raise Exception( f'Invalid package "scan_data" with duplicated name: {name!r}={value!r} ' f'present both as attribute AND as extra_data: {name!r}={extra_data[name]!r}' ) # re-hydrate lists of typed objects list_item_type = is_list_field = list_fields_by_item.get(name) if is_list_field: items = list(_rehydrate_list(cls=list_item_type, values=value)) package_data[name] = items else: # this is a plain, non-nested field package_data[name] = value return super().from_dict(package_data)