def rectify_latest_version(input): """Rectify the latest version of the EPVs.""" query_str = "g.V().has('ecosystem', '{arg0}')" \ ".has('name', '{arg1}')" \ ".property('latest_version', '{arg2}')" \ ".property('latest_version_last_updated', '{arg3}');" args = [] resp = { "message": "Latest version rectified for the EPVs", "status": "Success" } cur_date = (datetime.utcnow()).strftime('%Y%m%d') for epv in input: if 'ecosystem' in epv and 'name' in epv: eco = epv['ecosystem'] pkg = epv['name'] tmp = { "0": eco, "1": pkg } if 'actual_latest_version' in epv: latest = epv['actual_latest_version'] else: latest = get_latest_versions_for_ep(eco, pkg) tmp['2'] = latest tmp['3'] = cur_date known_latest = '' if 'latest_version' in epv: known_latest = epv['latest_version'] if known_latest != latest: args.append(tmp) result_data = batch_query_executor(query_str, args) logger.info("Latest version updated for the EPVs -> {r}".format(r=result_data)) return resp
def construct_graph_nodes(cls, epv): """Create query string to create empty EPV nodes.""" ecosystem = epv.get('ecosystem') pkg_name = epv.get('name') version = epv.get('version') source_repo = epv.get('source_repo', '') latest_version = epv.get('latest_version', '') if not latest_version: latest_version = get_latest_versions_for_ep(ecosystem, pkg_name) if ecosystem and pkg_name and version: # Query to Create Package Node # TODO: refactor into the separate module pkg_str = "pkg = g.V().has('ecosystem','{ecosystem}').has('name', '{pkg_name}')." \ "tryNext().orElseGet{{g.V()." \ "has('vertex_label','Count').choose(has('{ecosystem}_pkg_count')," \ "sack(assign).by('{ecosystem}_pkg_count').sack(sum).by(constant(" \ "1)).property('{ecosystem}_pkg_count',sack())," \ "property('{ecosystem}_pkg_count',1)).iterate();" \ "graph.addVertex('ecosystem', '{ecosystem}', " \ "'name', '{pkg_name}', 'vertex_label', 'Package');}};" \ "pkg.property('latest_version', '{latest_version}');" \ "pkg.property('last_updated', {last_updated});".format( ecosystem=ecosystem, latest_version=latest_version, pkg_name=pkg_name, last_updated=str(time.time()) ) # Query to Create Version Node # TODO: refactor into the separate module ver_str = "ver = g.V().has('pecosystem', '{ecosystem}').has('pname', " \ "'{pkg_name}').has('version', '{version}').tryNext().orElseGet{{" \ "g.V().has('vertex_label','Count').choose(has('{ecosystem}_ver_count')," \ "sack(assign).by('{ecosystem}_ver_count').sack(sum).by(constant(" \ "1)).property('{ecosystem}_ver_count',sack())," \ "property('{ecosystem}_ver_count',1)).iterate();" \ "graph.addVertex('pecosystem','{ecosystem}', 'pname','{pkg_name}', " \ "'version', '{version}', 'vertex_label', 'Version');}};" \ "ver.property('last_updated',{last_updated});".format( ecosystem=ecosystem, pkg_name=pkg_name, version=version, last_updated=str(time.time())) # Add version node properties if source_repo: ver_str += "ver.property('source_repo','{source_repo}');".format( source_repo=source_repo) # Query to create an edge between Package Node to Version Node # TODO: refactor into the separate module edge_str = "edge_c = g.V().has('pecosystem','{ecosystem}').has('pname'," \ "'{pkg_name}').has('version','{version}').in(" \ "'has_version').tryNext()" \ ".orElseGet{{pkg.addEdge('has_version', ver)}};".format( ecosystem=ecosystem, pkg_name=pkg_name, version=version) return pkg_str + ver_str + edge_str else: return None
def generate_report_for_latest_version(epv_list, day): """Generate a report for the latest version. :param epv_list: list, list of EPVs :return json, list of version information """ query_str = "g.V().has('ecosystem', '{arg0}')." \ "has('name', '{arg1}')" \ ".valueMap().dedup().fill(epv);" report_result = {} args = [] for epv in epv_list: eco = epv['ecosystem'] pkg = epv['name'] args.append({"0": eco, "1": pkg}) tmp = { "ecosystem": eco, "name": pkg, "known_latest_version": "", "actual_latest_version": "", "non_cve_version": "" } report_result[eco + "@DELIM@" + pkg] = tmp result_data = batch_query_executor(query_str, args) today = day.strftime('%Y%m%d') yesterday = (day - timedelta(days=1)).strftime('%Y%m%d') if result_data is not None: for res in result_data: eco = get_value(res, 'ecosystem') pkg = get_value(res, 'name') latest_pkg_version = get_value(res, 'latest_version') non_cve_version = get_value(res, 'latest_non_cve_version') last_updated_date = get_value(res, 'latest_version_last_updated') if last_updated_date == today or last_updated_date == yesterday: report_result[ eco + "@DELIM@" + pkg]['actual_latest_version'] = latest_pkg_version else: _logger.info( "Dates don't match. Will pick the version from upstream for {e} {p}" .format(e=eco, p=pkg)) latest = get_latest_versions_for_ep(eco, pkg) report_result[eco + "@DELIM@" + pkg]['actual_latest_version'] = latest report_result[eco + "@DELIM@" + pkg]['known_latest_version'] = latest_pkg_version report_result[eco + "@DELIM@" + pkg]['non_cve_version'] = non_cve_version return report_result
def create_query_string(cls, input_json): """Create query to get information about the package or package+version .""" # TODO add check of JSON against the schema # NPM packages with dependencies, versions i.e. Package version # TODO add check for existence of this attribute pkg_name = input_json.get('package') # TODO add check for existence of this attribute ecosystem = input_json.get('ecosystem') version = cls.sanitize_text_for_query(input_json.get('version')) # creation of query string str_gremlin = "" str_package, prp_package = cls.construct_package_query(input_json) if prp_package: str_gremlin = str_package + prp_package if version is not None and version != '': str_gremlin_version = cls.construct_version_query(input_json) # Add edge from Package to Version if str_gremlin_version: str_gremlin += str_gremlin_version if not prp_package: # TODO: refactor into the separate module latest_version = get_latest_versions_for_ep( ecosystem, pkg_name) str_gremlin += "pkg = g.V().has('ecosystem','{ecosystem}')." \ "has('name', '{pkg_name}').tryNext().orElseGet{{" \ "g.V().has('vertex_label','Count').choose(has('" \ "{ecosystem}_pkg_count'),sack(assign).by('" \ "{ecosystem}_pkg_count').sack(sum).by(constant(1))." \ "property('{ecosystem}_pkg_count',sack()),property(" \ "'{ecosystem}_pkg_count',1)).iterate();graph.addVertex(" \ "'ecosystem', '{ecosystem}', 'name', '{pkg_name}', " \ "'vertex_label', 'Package');}};" \ "pkg.property('latest_version', '{latest_version}');" \ "pkg.property('last_updated', {last_updated});".format( ecosystem=ecosystem, latest_version=latest_version, pkg_name=pkg_name, last_updated=str(time.time())) # TODO: refactor into the separate module str_gremlin += "edge_c = g.V().has('pecosystem','{ecosystem}').has('pname'," \ "'{pkg_name}').has('version','{version}').in(" \ "'has_version').tryNext()" \ ".orElseGet{{pkg.addEdge('has_version', ver)}};".format( ecosystem=ecosystem, pkg_name=pkg_name, version=version) logger.info("Gremlin Query: %s" % str_gremlin) return str_gremlin
def test_get_latest_versions_for_ep(): """Test basic behavior of function get_latest_versions_for_ep.""" package_versions = get_latest_versions_for_ep("maven", "tomcat:catalina") assert package_versions is not None package_versions = get_latest_versions_for_ep("maven", "org.abcl:abcl") assert package_versions is not None package_versions = get_latest_versions_for_ep("pypi", "numpy") assert package_versions is not None package_versions = get_latest_versions_for_ep("npm", "array") assert package_versions is not None with pytest.raises(ValueError): get_latest_versions_for_ep("cobol", "cds-parsers") with pytest.raises(ValueError): get_latest_versions_for_ep("maven", None)
def generate_report_for_latest_version(epv_list): """Generate a report for the latest version. :param epv_list: list, list of EPVs :return json, list of version information """ query_str = "g.V().has('ecosystem', '{arg0}')." \ "has('name', '{arg1}')" \ ".valueMap().dedup().fill(epv);" report_result = {} args = [] for epv in epv_list: eco = epv['ecosystem'] pkg = epv['name'] args.append({ "0": eco, "1": pkg }) latest = get_latest_versions_for_ep(eco, pkg) tmp = { "ecosystem": eco, "name": pkg, "known_latest_version": "", "actual_latest_version": latest } report_result[eco + "@" + pkg] = tmp result_data = batch_query_executor(query_str, args) if result_data is not None: for res in result_data: eco = get_value(res, 'ecosystem') pkg = get_value(res, 'name') latest_pkg_version = get_value(res, 'latest_version') report_result[eco + "@" + pkg]['known_latest_version'] = latest_pkg_version return report_result
def construct_graph_nodes(cls, epv): """Create query string to create empty EPV nodes.""" ecosystem = epv.get('ecosystem') pkg_name = epv.get('name') version = epv.get('version') source_repo = epv.get('source_repo', '') license = epv.get('license', []) gh_link = epv.get('gh_link', '') latest_version = epv.get('latest_version', '') if not latest_version: latest_version = get_latest_versions_for_ep(ecosystem, pkg_name) bindings = { "ecosystem": ecosystem, "name": pkg_name, "version": version, "repo": source_repo, "gh_link": gh_link, "latest": latest_version, "ep_count": ecosystem + "_pkg_count", "epv_count": ecosystem + "_ver_count", "last_updated": str(time.time()), "vertex_p": "Package", "vertex_c": "Count", "vertex_v": "Version" } if ecosystem and pkg_name and version: # Query to Create Package Node # TODO: refactor into the separate module pkg_str = "pkg = g.V().has('ecosystem',ecosystem).has('name', name)." \ "tryNext().orElseGet{g.V()." \ "has('vertex_label',vertex_c).choose(has(ep_count)," \ "sack(assign).by(ep_count).sack(sum).by(constant(" \ "1)).property(ep_count,sack())," \ "property(ep_count,1)).iterate();" \ "graph.addVertex('ecosystem', ecosystem, " \ "'name', name, 'vertex_label', vertex_p);};" \ "pkg.property('latest_version', latest);" \ "pkg.property('last_updated', last_updated);" # Query to Create Version Node # TODO: refactor into the separate module ver_str = "ver = g.V().has('pecosystem', ecosystem).has('pname', " \ "name).has('version', version).tryNext().orElseGet{" \ "g.V().has('vertex_label', vertex_c).choose(has(epv_count)," \ "sack(assign).by(epv_count).sack(sum).by(constant(" \ "1)).property(epv_count,sack())," \ "property(epv_count,1)).iterate();" \ "graph.addVertex('pecosystem',ecosystem, 'pname',name, " \ "'version', version, 'vertex_label', vertex_v);};" \ "ver.property('last_updated',last_updated);" # Add version node properties if source_repo: ver_str += "ver.property('source_repo', repo);" if license and len(license) > 0: counter = 1 for lic in license: ver_str += "ver.property('declared_licenses', lic" + str( counter) + ");" bindings["lic" + str(counter)] = lic counter += 1 # Add package node properties if gh_link: pkg_str += "pkg.property('gh_link', gh_link);" # Query to create an edge between Package Node to Version Node # TODO: refactor into the separate module edge_str = "edge_c = g.V().has('pecosystem', ecosystem).has('pname'," \ "name).has('version', version).in(" \ "'has_version').tryNext()" \ ".orElseGet{pkg.addEdge('has_version', ver)};" return pkg_str + ver_str + edge_str, bindings else: return None, None
def _import_keys_from_s3_http(data_source, epv_list): # TODO: reduce cyclomatic complexity logger.debug("Begin import...") report = {'status': 'Success', 'message': 'The import finished successfully!'} count_imported_EPVs = 0 last_imported_EPV = None epv = [] for epv_key in epv_list: for key, contents in epv_key.items(): if len(contents.get('pkg_list_keys')) == 0 and len(contents.get('ver_list_keys')) == 0: report['message'] = 'Nothing to be imported! No data found on S3 to be imported!' continue pkg_ecosystem = contents.get('ecosystem') pkg_name = contents.get('package') pkg_version = contents.get('version') or '' pkg_source = contents.get('source_repo', pkg_ecosystem) obj = { 'ecosystem': pkg_ecosystem, 'package': pkg_name, 'version': pkg_version, 'source_repo': pkg_source} latest_version = get_latest_versions_for_ep(pkg_ecosystem, pkg_name) latest_epv_list = [{ 'ecosystem': pkg_ecosystem, 'name': pkg_name, 'version': latest_version }] create_graph_nodes(latest_epv_list) try: # Check other Version level information and add it to common object if len(contents.get('ver_list_keys')) > 0: first_key = contents['ver_key_prefix'] + '.json' first_obj = _first_key_info(data_source, first_key, config.AWS_EPV_BUCKET) first_obj['latest_version'] = latest_version obj.update(first_obj) ver_obj = _other_key_info(data_source, contents.get('ver_list_keys'), config.AWS_EPV_BUCKET) if 'analyses' in obj: obj.get('analyses', {}).update(ver_obj['analyses']) else: obj.update(ver_obj) # Check Package related information and add it to package object if len(contents.get('pkg_list_keys')) > 0: pkg_obj = _other_key_info(data_source, contents.get('pkg_list_keys'), config.AWS_PKG_BUCKET) if 'analyses' in obj: obj.get('analyses', {}).update(pkg_obj['analyses']) else: obj.update(pkg_obj) # Create Gremlin Query str_gremlin = GraphPopulator.create_query_string(obj) if str_gremlin: # Fire Gremlin HTTP query now epv_full = pkg_ecosystem + ":" + pkg_name + ":" + pkg_version logger.info("Ingestion initialized for EPV - %s" % epv_full) epv.append(epv_full) payload = {'gremlin': str_gremlin} response = requests.post(config.GREMLIN_SERVER_URL_REST, data=json.dumps(payload), timeout=30) resp = response.json() if resp['status']['code'] == 200: count_imported_EPVs += 1 last_imported_EPV = (obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) # update first key with graph synced tag logger.info("Mark as synced in RDS %s" % last_imported_EPV) if not config.AWS_S3_IS_LOCAL: # pragma: no cover PostgresHandler().mark_epv_synced( obj.get('ecosystem'), obj.get('package'), obj.get('version') ) except Exception as e: # pragma: no cover logger.error(e) msg = _get_exception_msg("The import failed", e) report['status'] = 'Failure' report['message'] = msg report['epv'] = epv_key report['epv'] = epv_list report['count_imported_EPVs'] = count_imported_EPVs if count_imported_EPVs == 0 and report['status'] == 'Success': report['message'] = 'Nothing to be synced to Graph!' report['last_imported_EPV'] = last_imported_EPV return report
def test_get_latest_versions_for_ep(): """Test basic behavior of function get_latest_versions_for_ep.""" package_versions = get_latest_versions_for_ep("maven", "tomcat:catalina") assert package_versions is not None package_versions = get_latest_versions_for_ep("maven", "org.abcl:abcl") assert package_versions is not None package_versions = get_latest_versions_for_ep("pypi", "numpy") assert package_versions is not None package_versions = get_latest_versions_for_ep("npm", "array") assert package_versions is not None package_versions = get_latest_versions_for_ep("npm", "lerna-tt-pk2-sy") assert package_versions is not None package_versions = get_latest_versions_for_ep( "golang", "github.com/grafana/grafana") assert package_versions is not None package_versions = get_latest_versions_for_ep("golang", "no_such_pkg_exist") assert not package_versions package_versions = get_latest_versions_for_ep("npm", "abyzdeopkl") assert not package_versions package_versions = get_latest_versions_for_ep("maven", "abyzdeopkl") assert not package_versions package_versions = get_latest_versions_for_ep("pypi", "abyzdeopkl") assert not package_versions with pytest.raises(ValueError): get_latest_versions_for_ep("cobol", "cds-parsers") with pytest.raises(ValueError): get_latest_versions_for_ep("maven", None)