def determine_rejected_licenses(license_report, allowed_licenses, prohibited_licenses): accepted_filter_func = reutil.re_filter( include_regexes=allowed_licenses, exclude_regexes=prohibited_licenses, ) prohibited_filter_func = reutil.re_filter( include_regexes=prohibited_licenses, ) for upload_result, licenses in license_report: all_licenses = set(licenses) accepted_licenses = { l for l in all_licenses if accepted_filter_func(l.name()) } # The filter will always return true if its 'prohibited_licenses' is an empty collection. if prohibited_licenses: rejected_licenses = { l for l in all_licenses if prohibited_filter_func(l.name()) } else: rejected_licenses = set() unclassified_licenses = all_licenses - (accepted_licenses | rejected_licenses) if rejected_licenses or unclassified_licenses: yield upload_result, rejected_licenses, unclassified_licenses
def test_re_filter_empty_filter_matches_everything(): empty_filter = re_filter() # empty filter matches nothing assert empty_filter('foo') is True empty_filter_with_trans = re_filter(value_transformation=str) assert empty_filter_with_trans('foo') is True
def test_re_filter_exclude(): exclude_filter = re_filter(exclude_regexes=('^aaa.*', '^bbb')) assert not exclude_filter('aaa') assert not exclude_filter('bbb') assert exclude_filter('fooaaa') assert exclude_filter('bbbb') # require full match assert exclude_filter('ccc') exclude_and_include = re_filter(include_regexes=('^a', ), exclude_regexes=('^a', )) # exclusion has precedence assert not exclude_and_include('a')
def repository_matches(self, repository_name: str): repo_filter = reutil.re_filter( include_regexes=self.include_repositories(), exclude_regexes=self.exclude_repositories(), ) return repo_filter(repository_name)
def scan_artifact_with_white_src( extra_whitesource_config: typing.Dict, scan_artifact: dso.model.ScanArtifact, whitesource_client: whitesource.client.WhitesourceClient, ): logger.info('init scan') github_api = ccc.github.github_api_from_gh_access( access=scan_artifact.access) github_repo = github_api.repository( owner=scan_artifact.access.org_name(), repository=scan_artifact.access.repository_name(), ) logger.info('guessing commit hash') # guess git-ref for the given version commit_hash = product.util.guess_commit_from_source( artifact_name=scan_artifact.name, commit_hash=scan_artifact.access.commit, ref=scan_artifact.access.ref, github_repo=github_repo, ) exclude_regexes = '' include_regexes = '' if scan_artifact.label is not None: if scan_artifact.label.path_config is not None: exclude_regexes = scan_artifact.label.path_config.exclude_paths include_regexes = scan_artifact.label.path_config.include_paths path_filter_func = reutil.re_filter(exclude_regexes=exclude_regexes, include_regexes=include_regexes) with tempfile.TemporaryFile() as tmp_file: logger.info('downloading component for scan') file_size = whitesource.component.download_component( logger=logger, github_repo=github_repo, path_filter_func=path_filter_func, ref=commit_hash, target=tmp_file, ) # don't change the following line, lest things no longer work # sets the file position at the offset 0 == start of the file tmp_file.seek(0) logger.info('sending component to scan backend...') res = asyncio.run( whitesource_client.upload_to_project( extra_whitesource_config=extra_whitesource_config, file=tmp_file, project_name=scan_artifact.name, length=file_size, )) logger.info(res['message']) logger.info('scan complete')
def test_re_filter_include_matches(): include_filter = re_filter(include_regexes=('^aaa.*', '^bbb')) assert include_filter('aaa') assert include_filter('bbb') assert not include_filter('ccc') assert not include_filter('bbbb') # require full match
def __init__( self, include_image_refs=(), exclude_image_refs=(), include_image_names=(), exclude_image_names=(), ): self._image_ref_filter = reutil.re_filter( include_regexes=include_image_refs, exclude_regexes=exclude_image_refs, value_transformation=lambda image: image.access.imageReference, ) self._image_name_filter = reutil.re_filter( include_regexes=include_image_names, exclude_regexes=exclude_image_names, value_transformation=lambda image: image.name, )
def __init__( self, include_component_names=(), exclude_component_names=(), ): self._comp_name_filter = reutil.re_filter( include_regexes=include_component_names, exclude_regexes=exclude_component_names, value_transformation=lambda component: component.name, )
def scan_artifact_with_white_src( extra_whitesource_config: typing.Union[None, dict], scan_artifact: dso.model.ScanArtifact, whitesource_client: whitesource.client.WhitesourceClient, ) -> int: logger.debug('init scan') with tempfile.NamedTemporaryFile() as tmp_file: if scan_artifact.access.type is cm.AccessType.GITHUB: logger.debug('pulling from github') github_api = ccc.github.github_api_from_gh_access( access=scan_artifact.access) github_repo = github_api.repository( owner=scan_artifact.access.org_name(), repository=scan_artifact.access.repository_name(), ) # guess git-ref for the given version commit_hash = product.util.guess_commit_from_source( artifact_name=scan_artifact.name, commit_hash=scan_artifact.access.commit, ref=scan_artifact.access.ref, github_repo=github_repo, ) exclude_regexes = () include_regexes = () if scan_artifact.label is not None: if scan_artifact.label.path_config is not None: exclude_regexes = scan_artifact.label.path_config.exclude_paths include_regexes = scan_artifact.label.path_config.include_paths path_filter_func = reutil.re_filter( exclude_regexes=exclude_regexes, include_regexes=include_regexes) whitesource.component.download_component( logger=logger, github_repo=github_repo, path_filter_func=path_filter_func, ref=commit_hash, target=tmp_file, ) elif scan_artifact.access.type is cm.AccessType.OCI_REGISTRY: logger.debug('pulling from oci registry') oci_client = ccc.oci.oci_client() tar_gen = oci.image_layers_as_tarfile_generator( image_reference=scan_artifact.access.imageReference, oci_client=oci_client, include_config_blob=False, ) fake_gen = tarutil._FilelikeProxy(generator=tar_gen) while chunk := fake_gen.read(): tmp_file.write(chunk) else:
def component_name_filter(include_regexes=(), exclude_regexes=()): if not include_regexes and not exclude_regexes: return lambda component: True def to_component_name(component: gci.componentmodel.Component): return component.name return reutil.re_filter( include_regexes=include_regexes, exclude_regexes=exclude_regexes, value_transformation=to_component_name, )
def image_name_filter(include_regexes=(), exclude_regexes=()): if not include_regexes and not exclude_regexes: return lambda container_image: True def to_logical_name(container_image: product.model.ContainerImage): return container_image.name() return reutil.re_filter( include_regexes=include_regexes, exclude_regexes=exclude_regexes, value_transformation=to_logical_name, )
def image_name_filter(include_regexes=(), exclude_regexes=()): if not include_regexes and not exclude_regexes: return lambda container_image: True def to_logical_name(resource: gci.componentmodel.Resource): _ensure_resource_is_oci(resource) return resource.name return reutil.re_filter( include_regexes=include_regexes, exclude_regexes=exclude_regexes, value_transformation=to_logical_name, )
def scan_gh_artifact( cx_project: checkmarx.project.CheckmarxProject, scan_artifact: dso.model.ScanArtifact, exclude_paths: typing.Sequence[str] = (), include_paths: typing.Sequence[str] = (), ) -> model.ScanResult: github_api = ccc.github.github_api_from_gh_access( access=scan_artifact.access) # access type has to be github thus we can call these methods gh_repo = github_api.repository( owner=scan_artifact.access.org_name(), repository=scan_artifact.access.repository_name(), ) try: commit_hash = product.util.guess_commit_from_source( artifact_name=scan_artifact.name, commit_hash=scan_artifact.access.commit, github_repo=gh_repo, ref=scan_artifact.access.ref, ) except github3.exceptions.NotFoundError as e: raise product.util.RefGuessingFailedError(e) if scan_artifact.label is not None: if scan_artifact.label.path_config is not None: include_paths = set( (*include_paths, *scan_artifact.label.path_config.include_paths)) exclude_paths = set( (*exclude_paths, *scan_artifact.label.path_config.exclude_paths)) # if the scan_artifact has no label we will implicitly scan everything # since all images have to specify a label in order to be scanned # only github access types can occour here without the label path_filter_func = reutil.re_filter( include_regexes=include_paths, exclude_regexes=exclude_paths, ) return upload_and_scan_gh_artifact( artifact_name=scan_artifact.name, cx_project=cx_project, gh_repo=gh_repo, source_commit_hash=commit_hash, path_filter_func=path_filter_func, )
def filter_recipients(self, recipients:typing.Iterable[str]): blacklist = self.raw.get('blacklist') if blacklist: email_filter = reutil.re_filter(exclude_regexes=blacklist) return {r for r in recipients if email_filter(r)} return recipients