def test_from_str(self, coords, from_str, is_from_str_ok, to_str, to_str_omit_version, to_repo_url): from_strings = from_str if isinstance(from_str, list) else [from_str] for fstr in from_strings: if is_from_str_ok: assert MavenCoordinates.from_str(fstr) == coords else: with pytest.raises(ValueError): MavenCoordinates.from_str(fstr)
def fetch_maven_artifact(ecosystem, name, version, target_dir): """Fetch maven artifact from maven.org.""" git = Git.create_git(target_dir) artifact_coords = MavenCoordinates.from_str(name) if not version: raise ValueError("No version provided for '%s'" % artifact_coords.to_str()) artifact_coords.version = version if not artifact_coords.is_valid(): raise NotABugTaskError("Invalid Maven coordinates: {a}".format( a=artifact_coords.to_str())) maven_url = ecosystem.fetch_url artifact_url = urljoin(maven_url, artifact_coords.to_repo_url()) local_filepath = IndianaJones.download_file(artifact_url, target_dir) if local_filepath is None: raise NotABugTaskError("Unable to download: %s" % artifact_url) local_filename = os.path.split(local_filepath)[1] artifact_path = os.path.join(target_dir, local_filename) digest = compute_digest(artifact_path) if artifact_coords.packaging != 'pom': Archive.extract(artifact_path, target_dir) if artifact_coords.packaging == 'aar': # 'aar' archive contains classes.jar, extract it too into target_dir classes_jar_path = os.path.join(target_dir, "classes.jar") if os.path.isfile(classes_jar_path): Archive.extract(classes_jar_path, target_dir) os.remove(classes_jar_path) git.add_and_commit_everything() return digest, artifact_path
def get(ecosystem, package, version): decoded = decode_token() package = urllib.parse.unquote(package) if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) package = case_sensitivity_transform(ecosystem, package) result = get_analyses_from_graph(ecosystem, package, version) if result is not None: # Known component for Bayesian server_create_component_bookkeeping(ecosystem, package, version, decoded) return result if os.environ.get("INVOKE_API_WORKERS", "") == "1": # Enter the unknown path server_create_analysis(ecosystem, package, version, user_profile=decoded, api_flow=True, force=False, force_graph_sync=True) msg = "Package {ecosystem}/{package}/{version} is unavailable. " \ "The package will be available shortly," \ " please retry after some time.".format(ecosystem=ecosystem, package=package, version=version) raise HTTPError(202, msg) else: server_create_analysis(ecosystem, package, version, user_profile=decoded, api_flow=False, force=False, force_graph_sync=True) msg = "No data found for {ecosystem} Package " \ "{package}/{version}".format(ecosystem=ecosystem, package=package, version=version) raise HTTPError(404, msg)
def server_create_analysis(ecosystem, package, version, user_profile, api_flow=True, force=False, force_graph_sync=False): """Create bayesianApiFlow handling analyses for specified EPV. :param ecosystem: ecosystem for which the flow should be run :param package: package for which should be flow run :param version: package version :param force: force run flow even specified EPV exists :param force_graph_sync: force synchronization to graph :return: dispatcher ID handling flow """ # Bookkeeping first component = MavenCoordinates.normalize_str( package) if ecosystem == 'maven' else package server_create_component_bookkeeping(ecosystem, component, version, user_profile) args = { 'ecosystem': ecosystem, 'name': component, 'version': version, 'force': force, 'force_graph_sync': force_graph_sync } if api_flow: return server_run_flow('bayesianApiFlow', args) else: return server_run_flow('bayesianFlow', args)
def _create_analysis_arguments(ecosystem, name, version): """Create arguments for analysis.""" return { 'ecosystem': ecosystem, 'name': MavenCoordinates.normalize_str(name) if ecosystem == 'maven' else name, 'version': version }
def ca_validate_input(input_json: Dict, ecosystem: str) -> Tuple[List[Dict], List[Package]]: """Validate CA Input.""" logger.debug('Validating ca input data.') if not input_json: error_msg = "Expected JSON request" raise BadRequest(error_msg) if not isinstance(input_json, dict): error_msg = "Expected list of dependencies in JSON request" raise BadRequest(error_msg) if not check_for_accepted_ecosystem(ecosystem): error_msg: str = f"Ecosystem {ecosystem} is not supported for this request" raise BadRequest(error_msg) if not input_json.get('package_versions'): error_msg: str = "package_versions is missing" raise BadRequest(error_msg) gh = GithubUtils() packages_list = [] normalised_input_pkgs = [] for pkg in input_json.get('package_versions'): pseudo_version = False package = given_package = pkg.get("package") clean_version = given_version = pkg.get("version") if not all([package, given_version]): error_msg = "Invalid Input: Package, Version are required." raise BadRequest(error_msg) if (not isinstance(given_version, str)) or (not isinstance(package, str)): error_msg = "Package version should be string format only." raise BadRequest(error_msg) if not validate_version(given_version): error_msg = "Package version should not have special characters." raise BadRequest(error_msg) if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) if ecosystem == 'pypi': package = package.lower() if ecosystem == 'golang': _, clean_version = GolangDependencyTreeGenerator.clean_version(given_version) pseudo_version = gh.is_pseudo_version(clean_version) # Strip module appended to the package name package = package.split('@')[0] packages_list.append( {"name": package, "given_name": given_package, "version": clean_version, "given_version": given_version, "is_pseudo_version": pseudo_version}) normalised_input_pkgs.append(normlize_packages(package, given_package, clean_version, given_version, pseudo_version)) return packages_list, normalised_input_pkgs
def post(ecosystem, package, version): decoded = decode_token() if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) package = case_sensitivity_transform(ecosystem, package) server_create_analysis(ecosystem, package, version, user_profile=decoded or {}, api_flow=True, force=True, force_graph_sync=False) return {}, 202
def _create_analysis_arguments(ecosystem, name, version): """Create arguments for analysis.""" return { 'ecosystem': ecosystem, 'name': MavenCoordinates.normalize_str(name) if Ecosystem.by_name( StoragePool.get_connected_storage('BayesianPostgres').session, ecosystem).is_backed_by( EcosystemBackend.maven) else name, 'version': version }
def validate_input(input_json: Dict, ecosystem: str) -> (List[Dict], List[Dict]): """Validate Request Body.""" logger.debug('Validate Request Body.') if not input_json: error_msg = "Expected JSON request" raise BadRequest(error_msg) if not isinstance(input_json, dict): error_msg = "Expected list of dependencies in JSON request" raise BadRequest(error_msg) if not check_for_accepted_ecosystem(ecosystem): error_msg: str = f"Ecosystem {ecosystem} is not supported for this request" raise BadRequest(error_msg) if not input_json.get('package_versions'): error_msg: str = "package_versions is missing" raise BadRequest(error_msg) packages_list = [] invalid_packages = [] for pkg in input_json.get('package_versions'): package = pkg.get("package") clean_version = pkg.get("version") if not all([package, clean_version]): error_msg = "Invalid Input: Package, Version are required." raise BadRequest(error_msg) if (not isinstance(clean_version, str)) or (not isinstance( package, str)): error_msg = "Package version should be string format only." raise BadRequest(error_msg) if not validate_version(clean_version): error_msg = "Package version should not have special characters." raise BadRequest(error_msg) if ecosystem == 'maven': try: package = MavenCoordinates.normalize_str(package) # if package is invalid, add it to list of invalid packages except Exception: invalid_packages.append({ "name": package, "version": clean_version, "vulnerabilities": [] }) continue if ecosystem == 'pypi': package = package.lower() packages_list.append({"name": package, "version": clean_version}) return packages_list, invalid_packages
def get_latest_analysis_for(ecosystem, package, version): """Note: has to be called inside flask request context""" try: if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) return rdb.session.query(Analysis).\ join(Version).join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ filter(Version.identifier == version).\ order_by(Analysis.started_at.desc()).\ first() except NoResultFound: return None
def get_latest_analysis_for(ecosystem, package, version): """Note: has to be called inside flask request context.""" if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) try: return rdb.session.query(Analysis).\ join(Version).join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ filter(Version.identifier == version).\ order_by(Analysis.started_at.desc()).\ first() except SQLAlchemyError: rdb.session.rollback() raise
def get(ecosystem, package, version): """Handle the GET REST API call.""" decoded = decode_token() package = urllib.parse.unquote(package) if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) package = case_sensitivity_transform(ecosystem, package) result = get_analyses_from_graph(ecosystem, package, version) if result is not None: return result msg = "No data found for {ecosystem} package " \ "{package}/{version}".format(ecosystem=ecosystem, package=package, version=version) raise HTTPError(404, msg)
def _get_downstream_mvn_pkgs(self, eco, pkg): packages = [] self.log.info('Searching for {pkg} in maven repo {repo}...'.format( pkg=pkg, repo=RH_MVN_GA_REPO)) ga = MavenCoordinates.from_str(pkg).to_repo_url(ga_only=True) result = requests.get('{repo}/{pkg}'.format(repo=RH_MVN_GA_REPO, pkg=ga)) if result.status_code != 200: self.log.info( 'Package {pkg} not found in {repo} (status code {code})'. format(pkg=pkg, repo=RH_MVN_GA_REPO, code=result.status_code)) else: self.log.info('Found {pkg} in {repo}'.format(pkg=pkg, repo=RH_MVN_GA_REPO)) packages.append(pkg) return RH_MVN_DISTRO_NAME, packages
def ca_validate_input(input_json: Dict, ecosystem: str) -> Tuple[List[Dict], List[Package]]: """Validate CA Input.""" logger.debug('Validating ca input data.') if not input_json: error_msg = "Expected JSON request" raise BadRequest(error_msg) if not isinstance(input_json, dict): error_msg = "Expected list of dependencies in JSON request" raise BadRequest(error_msg) if not check_for_accepted_ecosystem(ecosystem): error_msg: str = f"Ecosystem {ecosystem} is not supported for this request" raise BadRequest(error_msg) if not input_json.get('package_versions'): error_msg: str = "package_versions is missing" raise BadRequest(error_msg) packages_list = [] normalised_input_pkgs = [] for pkg in input_json.get('package_versions'): package = pkg.get("package") version = pkg.get("version") if not all([package, version]): error_msg = "Invalid Input: Package, Version are required." raise BadRequest(error_msg) if (not isinstance(version, str)) or (not isinstance(package, str)): error_msg = "Package version should be string format only." raise BadRequest(error_msg) if not validate_version(version): error_msg = "Package version should not have special characters." raise BadRequest(error_msg) if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) if ecosystem == 'pypi': package = package.lower() packages_list.append({"name": package, "version": version}) normalised_input_pkgs.append(normlize_packages(package, version)) return packages_list, normalised_input_pkgs
def get_analysis_count(self, ecosystem, package): """Get count of previously scheduled analyses for given ecosystem-package. :param ecosystem: str, Ecosystem name :param package: str, Package name :return: analysis count """ if ecosystem == 'maven': package = MavenCoordinates.normalize_str(package) try: count = PostgresBase.session.query(PackageAnalysis).\ join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ count() except SQLAlchemyError: PostgresBase.session.rollback() raise return count
def _get_dependency_data(self, dependencies, ecosystem): dependency_data_list = list() self.log.debug("Dependencies are: {}".format(dependencies)) for dependency in dependencies: self.log.info("Analyzing dependency: {}".format(dependency)) artifact_coords = MavenCoordinates.from_str(dependency) qstring = ("g.V().has('pecosystem','" + ecosystem + "').has('pname','" + artifact_coords.groupId + ":" + artifact_coords.artifactId + "')" ".has('version','" + artifact_coords.version + "').") qstring += ( "as('version').in('has_version').as('package').dedup()." + "select('version','package').by(valueMap());") payload = {'gremlin': qstring} try: graph_req = get_session_retry().post(GREMLIN_SERVER_URL_REST, data=json.dumps(payload)) if graph_req.status_code == 200: graph_resp = graph_req.json() data = graph_resp.get('result', {}).get('data') if data: version_data = self.parse_version_data( data[0].get('version')) package_data = self.parse_package_data( data[0].get('package')) dependency_data = version_data.copy() dependency_data.update(package_data) dependency_data_list.append(dependency_data) else: self.log.error("Failed retrieving dependency data.") continue except Exception: self.log.exception("Error retrieving dependency data.") continue self.log.debug( "Dependency data list is: {}".format(dependency_data_list)) return dependency_data_list
def get_analysis_count(ecosystem, package, version): """Get count of previously scheduled analysis for given EPV triplet. :param ecosystem: str, Ecosystem name :param package: str, Package name :param version: str, Package version :return: analysis count """ if Ecosystem.by_name(PostgresBase.session, ecosystem).is_backed_by(EcosystemBackend.maven): package = MavenCoordinates.normalize_str(package) try: count = PostgresBase.session.query(Analysis).\ join(Version).join(Package).join(Ecosystem).\ filter(Ecosystem.name == ecosystem).\ filter(Package.name == package).\ filter(Version.identifier == version).\ count() except SQLAlchemyError: PostgresBase.session.rollback() raise return count
def _add_mvn_results(self, result_summary, anitya_mvn_names, version): def _compare_version(downstream, upstream): dv = downstream if 'redhat' in dv: # remove ".redhat-X" or "-redhat-X" suffix dv = dv[:dv.find('redhat') - 1] if dv == upstream: return True else: return False downstream_rebuilds = [] for name in anitya_mvn_names: ga = MavenCoordinates.from_str(name).to_repo_url(ga_only=True) metadata_url = '{repo}/{pkg}/maven-metadata.xml'.format(repo=RH_MVN_GA_REPO, pkg=ga) res = requests.get(metadata_url) if res.status_code != 200: self.log.info('Metadata for package {pkg} not found in {repo} (status {code})'. format(pkg=name, repo=RH_MVN_GA_REPO, code=res.status_code)) continue versions = anymarkup.parse(res.text)['metadata']['versioning']['versions']['version'] # make sure 'versions' is a list (it's a string if there is just one version) if not isinstance(versions, list): versions = [versions] self.log.info('Found versions {v} for package {p}'.format(v=versions, p=name)) for v in versions: if _compare_version(v, version): downstream_rebuilds.append(v) result_summary['rh_mvn_matched_versions'] = downstream_rebuilds if downstream_rebuilds: # For now, we don't distinguish products, we just use general "Middleware" # for all Maven artifacts result_summary['all_rhsm_product_names'].append('Middleware')
def get(ecosystem, package, version): """Handle the GET REST API call. Component Analyses: - If package is Known (exists in GraphDB (Snyk Edge) returns Json formatted response. - If package is not Known: - DISABLE_UNKNOWN_PACKAGE_FLOW flag is 1: Skips the unknown package and returns 202 - DISABLE_UNKONWN_PACKAGE_FLOW flag is 0: Than checks below condition. - INVOKE_API_WORKERS flag is 1: Trigger bayesianApiFlow to fetch Package details - INVOKE_API_WORKERS flag is 0: Trigger bayesianFlow to fetch Package details :return: JSON Response """ st = time.time() # Analytics Data metrics_payload = { "pid": os.getpid(), "hostname": HOSTNAME, "endpoint": request.endpoint, "request_method": "GET", "ecosystem": ecosystem, "package": package, "version": version } response_template = namedtuple("response_template", ["message", "status"]) logger.info("Executed v2 API") package = urllib.parse.unquote(package) if re.findall('[!@#$%^&*()]', version): # Version should not contain special Characters. return response_template( { 'error': "Package version should not have special characters." }, 400) if not check_for_accepted_ecosystem(ecosystem): msg = f"Ecosystem {ecosystem} is not supported for this request" raise HTTPError(400, msg) if ecosystem == 'maven': try: package = MavenCoordinates.normalize_str(package) except ValueError: msg = f"Invalid maven format - {package}" metrics_payload.update({ "status_code": 400, "value": time.time() - st }) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) raise HTTPError(400, msg) package = case_sensitivity_transform(ecosystem, package) # Perform Component Analyses on Vendor specific Graph Edge. analyses_result = ComponentAnalyses( ecosystem, package, version).get_component_analyses_response() if analyses_result is not None: # Known component for Fabric8 Analytics server_create_component_bookkeeping(ecosystem, package, version, g.decoded_token) metrics_payload.update({ "status_code": 200, "value": time.time() - st }) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) return analyses_result elif os.environ.get("DISABLE_UNKNOWN_PACKAGE_FLOW", "") == "1": msg = f"No data found for {ecosystem} package {package}/{version} " \ "ingetion flow skipped as DISABLE_UNKNOWN_PACKAGE_FLOW is enabled" return response_template({'error': msg}, 202) if os.environ.get("INVOKE_API_WORKERS", "") == "1": # Trigger the unknown component ingestion. server_create_analysis(ecosystem, package, version, user_profile=g.decoded_token, api_flow=True, force=False, force_graph_sync=True) msg = f"Package {ecosystem}/{package}/{version} is unavailable. " \ "The package will be available shortly," \ " please retry after some time." metrics_payload.update({ "status_code": 202, "value": time.time() - st }) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) return response_template({'error': msg}, 202) # No data has been found and INVOKE_API_WORKERS flag is down. server_create_analysis(ecosystem, package, version, user_profile=g.decoded_token, api_flow=False, force=False, force_graph_sync=True) msg = f"No data found for {ecosystem} package {package}/{version}" metrics_payload.update({"status_code": 404, "value": time.time() - st}) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) raise HTTPError(404, msg)
def get(ecosystem, package, version): """Handle the GET REST API call. Component Analyses: - If package is Known (exists in GraphDB (Snyk Edge) returns Json formatted response. - If package is not Known: Call Util's function to trigger ingestion flow. :return: JSON Response """ st = time.time() # Analytics Data metrics_payload = { "pid": os.getpid(), "hostname": HOSTNAME, "endpoint": request.endpoint, "request_method": "GET", "ecosystem": ecosystem, "package": package, "version": version } response_template = namedtuple("response_template", ["message", "status"]) logger.info("Executed v2 API") package = urllib.parse.unquote(package) if re.findall('[!@#$%^&*()]', version): # Version should not contain special Characters. return response_template( { 'error': "Package version should not have special characters." }, 400) if not check_for_accepted_ecosystem(ecosystem): msg = f"Ecosystem {ecosystem} is not supported for this request" raise HTTPError(400, msg) if ecosystem == 'maven': try: package = MavenCoordinates.normalize_str(package) except ValueError: msg = f"Invalid maven format - {package}" metrics_payload.update({ "status_code": 400, "value": time.time() - st }) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) raise HTTPError(400, msg) package = case_sensitivity_transform(ecosystem, package) # Perform Component Analyses on Vendor specific Graph Edge. analyses_result = ComponentAnalyses( ecosystem, package, version).get_component_analyses_response() if analyses_result is not None: metrics_payload.update({ "status_code": 200, "value": time.time() - st }) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) return analyses_result # No data has been found unknown_pkgs = set() unknown_pkgs.add( ingestion_utils.Package(package=package, version=version)) unknown_package_flow(ecosystem, unknown_pkgs) msg = f"No data found for {ecosystem} package {package}/{version}" metrics_payload.update({"status_code": 404, "value": time.time() - st}) _session.post(url=METRICS_SERVICE_URL + "/api/v1/prometheus", json=metrics_payload) raise HTTPError(404, msg)
def foo(x): s.add(x) original = set(range(0, 10)) tp = ThreadPool(foo) for i in original: tp.add_task(i) tp.start() tp.join() assert s == original example_coordinates = [ # MavenCoordinates(), from_str, is_from_str_ok, to_str, # to_str(omit_version=True), to_repo_url (MavenCoordinates('g', 'a'), 'g:a', True, 'g:a', 'g:a', None), (MavenCoordinates('g', 'a', '1'), 'g:a:1', True, 'g:a:1', 'g:a', 'g/a/1/a-1.jar'), (MavenCoordinates('g', 'a', packaging='war'), 'g:a:war:', True, 'g:a:war:', 'g:a:war:', None), (MavenCoordinates('g', 'a', '1', packaging='war'), ['g:a:war:1', 'g:a:war::1'], True, 'g:a:war:1', 'g:a:war:', 'g/a/1/a-1.war'), (MavenCoordinates('g', 'a', classifier='sources'), 'g:a::sources:', True, 'g:a::sources:', 'g:a::sources:', None), (MavenCoordinates('g', 'a', '1', classifier='sources'), 'g:a::sources:1', True, 'g:a::sources:1', 'g:a::sources:', 'g/a/1/a-1-sources.jar'), (MavenCoordinates('g', 'a', packaging='war', classifier='sources'), 'g:a:war:sources:', True, 'g:a:war:sources:', 'g:a:war:sources:', None), (MavenCoordinates('g', 'a', '1', packaging='war', classifier='sources'), 'g:a:war:sources:1', True, 'g:a:war:sources:1', 'g:a:war:sources:', 'g/a/1/a-1-sources.war'), (MavenCoordinates('org.fedoraproject', 'test-artifact', '1.0-beta1'),