def set_packages_root(resource, codebase): """ Set the root_path attribute as the path to the root Resource for a given package package or build script that may exist in a `resource`. """ # only files can have a package if not resource.is_file: return package_manifests = resource.package_manifests if not package_manifests: return # NOTE: we are dealing with a single file therefore there should be only be # a single package detected. But some package manifests can document more # than one package at a time such as multiple arches/platforms for a gempsec # or multiple sub package (with "%package") in an RPM .spec file. modified = False for package_manifest in package_manifests: package_instance = get_package_instance(package_manifest) package_root = package_instance.get_package_root(resource, codebase) if not package_root: # this can happen if we scan a single resource that is a package package continue # What if the target resource (e.g. a parent) is the root and we are in stripped root mode? if package_root.is_root and codebase.strip_root: continue package_manifest['root_path'] = package_root.path modified = True if modified: # we did set the root_path codebase.save_resource(resource) return resource
def test_get_package_resources_on_nested_packages_should_include_manifest(self): from packagedcode import get_package_instance from scancode.resource import VirtualCodebase scan_loc = self.get_test_loc('plugin_consolidate/nested-npm-packages.json') codebase = VirtualCodebase(scan_loc) for resource in codebase.walk(): for package_data in resource.packages: package = get_package_instance(package_data) package_resources = list(package.get_package_resources(resource, codebase)) assert any(r.name == 'package.json' for r in package_resources), resource.path
def create_discovered_packages(project, scanned_codebase): """ Save the packages of a ScanCode `scanned_codebase` scancode.resource.Codebase object to the DB as DiscoveredPackage of `project`. Relate package resources to CodebaseResource. """ for scanned_resource in scanned_codebase.walk(skip_root=True): scanned_packages = getattr(scanned_resource, "packages", []) if not scanned_packages: continue scanned_resource_path = scanned_resource.get_path(strip_root=True) cbr = CodebaseResource.objects.get(project=project, path=scanned_resource_path) for scan_data in scanned_packages: discovered_package = pipes.update_or_create_package(project, scan_data) set_codebase_resource_for_package( codebase_resource=cbr, discovered_package=discovered_package ) scanned_package = packagedcode.get_package_instance(scan_data) # Set all the resource attached to that package scanned_package_resources = scanned_package.get_package_resources( scanned_resource, scanned_codebase ) for scanned_package_res in scanned_package_resources: package_cbr = CodebaseResource.objects.get( project=project, path=scanned_package_res.get_path(strip_root=True) ) set_codebase_resource_for_package( codebase_resource=package_cbr, discovered_package=discovered_package ) # also set dependencies as their own packages # TODO: we should instead relate these to the package # TODO: we likely need a status for DiscoveredPackage dependencies = scanned_package.dependencies or [] for dependency in dependencies: # FIXME: we should get DependentPackage instances and not a mapping purl = getattr(dependency, "purl", None) if not purl: # TODO: we should log that continue purl = PackageURL.from_string(purl) dep = purl.to_dict() dependent_package = pipes.update_or_create_package(project, dep) # attached to the current resource (typically a manifest?) set_codebase_resource_for_package( codebase_resource=cbr, discovered_package=dependent_package )
def test_get_package_resources_on_nested_packages_should_include_manifest( self): from packagedcode import get_package_instance from commoncode.resource import VirtualCodebase scan_file = self.get_scan('plugin_consolidate/nested-packages', cli_options='-p') codebase = VirtualCodebase(scan_file) for resource in codebase.walk(): for package_data in resource.package_manifests: package = get_package_instance(package_data) package_resources = list( package.get_package_resources(resource, codebase)) assert any(r.name == 'package.json' for r in package_resources), resource.path
def test_Package_get_package_resource_works_with_nested_packages_and_ignores( self): from packagedcode import get_package_instance from packagedcode import npm from commoncode.resource import VirtualCodebase scan_loc = self.get_test_loc('models/nested-npm-packages.json') codebase = VirtualCodebase(scan_loc) for resource in codebase.walk(): for package_data in resource.packages: package = get_package_instance(package_data) assert isinstance(package, npm.NpmPackage) package_resources = list( package.get_package_resources(resource, codebase)) assert any(r.name == 'package.json' for r in package_resources), resource.path
def get_consolidated_packages(codebase): """ Yield a ConsolidatedPackage for each detected package in the codebase """ for resource in codebase.walk(topdown=False): for package_data in resource.packages: package = get_package_instance(package_data) package_root = package.get_package_root(resource, codebase) package_root.extra_data['package_root'] = True package_root.save(codebase) is_build_file = isinstance(package, BaseBuildManifestPackage) package_resources = list( package.get_package_resources(package_root, codebase)) package_license_expression = package.license_expression package_copyright = package.copyright package_holders = [] if package_copyright: numbered_lines = [(0, package_copyright)] for _, holder, _, _ in CopyrightDetector().detect( numbered_lines, copyrights=False, holders=True, authors=False, include_years=False): package_holders.append(holder) package_holders = process_holders(package_holders) discovered_license_expressions = [] discovered_holders = [] for package_resource in package_resources: if not is_build_file: # If a resource is part of a package Component, then it cannot be part of any other type of Component package_resource.extra_data['in_package_component'] = True package_resource.save(codebase) if package_resource.license_expressions: package_resource_license_expression = combine_expressions( package_resource.license_expressions) if package_resource_license_expression: discovered_license_expressions.append( package_resource_license_expression) if package_resource.holders: discovered_holders.extend( h.get('value') for h in package_resource.holders) discovered_holders = process_holders(discovered_holders) combined_discovered_license_expression = combine_expressions( discovered_license_expressions) if combined_discovered_license_expression: simplified_discovered_license_expression = str( Licensing().parse( combined_discovered_license_expression).simplify()) else: simplified_discovered_license_expression = None c = Consolidation( core_license_expression=package_license_expression, # Sort holders by holder key core_holders=[ h for h, _ in sorted(copyright_summary.cluster( package_holders), key=lambda t: t[0].key) ], other_license_expression= simplified_discovered_license_expression, # Sort holders by holder key other_holders=[ h for h, _ in sorted(copyright_summary.cluster( discovered_holders), key=lambda t: t[0].key) ], files_count=len([ package_resource for package_resource in package_resources if package_resource.is_file ]), resources=package_resources, ) if is_build_file: c.identifier = package.name yield ConsolidatedComponent(type='build', consolidation=c) else: yield ConsolidatedPackage(package=package, consolidation=c)
def get_consolidated_packages(codebase): """ Yield a ConsolidatedPackage for each detected package in the codebase """ for resource in codebase.walk(topdown=False): for package_data in resource.packages: package = get_package_instance(package_data) is_build_file = isinstance(package, BaseBuildManifestPackage) package_resources = list(package.get_package_resources(resource, codebase)) package_license_expression = package.license_expression package_copyright = package.copyright package_holders = [] if package_copyright: numbered_lines = [(0, package_copyright)] for _, holder, _, _ in CopyrightDetector().detect(numbered_lines, copyrights=False, holders=True, authors=False, include_years=False): package_holders.append(holder) discovered_license_expressions = [] discovered_holders = [] for package_resource in package_resources: if not is_build_file: # If a resource is part of a package Component, then it cannot be part of any other type of Component package_resource.extra_data['in_package_component'] = True package_resource.save(codebase) package_resource_license_expression = combine_expressions(package_resource.license_expressions) package_resource_holders = package_resource.holders if not package_resource_license_expression and not package_resource_holders: continue discovered_license_expressions.append(package_resource_license_expression) discovered_holders.extend(h.get('value') for h in package_resource_holders) # Remove NoneTypes from discovered licenses discovered_license_expressions = [lic for lic in discovered_license_expressions if lic] # Remove NoneTypes from discovered holders discovered_holders = [holder for holder in discovered_holders if holder] combined_discovered_license_expression = combine_expressions(discovered_license_expressions) if combined_discovered_license_expression: simplified_discovered_license_expression = str(Licensing().parse(combined_discovered_license_expression).simplify()) else: simplified_discovered_license_expression = None c = Consolidation( core_license_expression=package_license_expression, core_holders=sorted(set(package_holders)), other_license_expression=simplified_discovered_license_expression, other_holders=sorted(set(discovered_holders)), files_count=sum(1 for package_resource in package_resources if package_resource.is_file), resources=package_resources, ) if is_build_file: c.identifier = package.name yield ConsolidatedComponent( type='build', consolidation=c ) else: yield ConsolidatedPackage( package=package, consolidation=c )