def test_execute_gremlin_dsl(mocker): """Test the function execute_gremlin_dsl.""" mocker.return_value = "" query_str = "g.V().has('ecosystem', eco).has('name',pkg).valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'eco': 'maven', 'pkg': 'io.vertx:vertx-web' } } out = execute_gremlin_dsl(payload) assert out is None mocker.return_value = MockedSession("true") out = execute_gremlin_dsl(payload) assert out['requestId'] == "f98d1366-738e-4c14-a3ff-594f359e131c" out = get_response_data(out, [{0: 0}]) assert "a" in out mocker.return_value = MockedSession("false") query_str = "g.V().has('ecosystem', eco).has('name',pkg).valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'eco': 'maven', 'pkg': 'io.vertx:vertx-web' } } out = execute_gremlin_dsl(payload) assert out is None
def get_repos(): """Read all the repo data.""" pkg_list = [] license_lst = [] eco_lst = [] for pkg in PACKAGE_DATA: pkg_list.append(PACKAGE_DATA[pkg]['name']) eco_lst.append(PACKAGE_DATA[pkg]['ecosystem']) query_str = "g.V().has('pecosystem',within(eco_lst))." \ "has('pname',within(pkg_list))" \ ".in('has_dependency').valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'pkg_list': pkg_list, 'eco_lst': eco_lst } } gremlin_response = execute_gremlin_dsl(payload) result_data = get_response_data(gremlin_response, [{0: 0}]) repo_list = [] for data in result_data: repo_list.append(get_value(data, 'repo_url')) query_str = "g.V().has('repo_url', within(repo_list)).as('a')." \ "out('has_dependency').as('b').select('a','b').by(valueMap())" payload = { 'gremlin': query_str, 'bindings': { 'repo_list': repo_list } } gremlin_response = execute_gremlin_dsl(payload) result_data = get_response_data(gremlin_response, [{0: 0}]) for result in result_data: repo = get_value(result['a'], 'repo_url') del license_lst[:] if 'licenses' in result['b']: licenses = result['b']['licenses'] for lic in licenses: license_lst.append(lic) eco = get_value(result['b'], 'pecosystem') name = get_value(result['b'], 'pname') version = get_value(result['b'], 'version') key = eco + ":" + name + ":" + version VERSION_DATA[key] = {} VERSION_DATA[key]['version'] = version VERSION_DATA[key]['package'] = eco + ":" + name VERSION_DATA[key]['license'] = license_lst if repo not in REPO_DATA: REPO_DATA[repo] = {} REPO_DATA[repo]['ecosystem'] = eco if 'dependencies' not in REPO_DATA[repo]: REPO_DATA[repo]['dependencies'] = [] key = eco + ":" + name + ":" + version if key not in REPO_DATA[repo]['dependencies']: REPO_DATA[repo]['dependencies'].append(key)
def get_dependency_data(epv_set): """Get dependency data from graph.""" epv_list = {"result": {"data": [], "unknown_deps": []}} unknown_deps_list = [] query = "epv=[];" batch_query = "a = g.V().has('pecosystem', '{eco}').has('pname', '{name}')." \ "has('version', '{ver}').dedup(); a.clone().as('version')." \ "in('has_version').dedup().as('package').select('version')." \ "coalesce(out('has_cve').as('cve')." \ "select('package','version','cve').by(valueMap())," \ "select('package','version').by(valueMap()))." \ "fill(epv);" i = 1 epvs = [x for x, y in epv_set['direct'].items()] dep_list = [] for epv in epvs: eco, name, ver = epv.split('|#|') dep_list.append((name, ver)) query += batch_query.format(eco=eco, name=name, ver=ver) if i >= GREMLIN_QUERY_SIZE: i = 1 # call_gremlin in batch payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: epv_list['result']['data'] += result['result']['data'] query = "epv=[];" i += 1 if i > 1: payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: epv_list['result']['data'] += result['result']['data'] tr_epv_list, tr_list = get_tr_dependency_data(epv_set) transitive_count = len(tr_epv_list['result']['data']) # Identification of unknown direct dependencies epv_data = epv_list['result']['data'] epv_list, unknown_deps_list = find_unknown_deps(epv_data, epv_list, dep_list, unknown_deps_list) # Identification of unknown transitive dependencies epv_data = tr_epv_list['result']['data'] epv_list, unknown_deps_list = find_unknown_deps(epv_data, epv_list, tr_list, unknown_deps_list, True) result = add_transitive_details(epv_list, epv_set) return { 'result': result, 'unknown_deps': unknown_deps_list, 'transitive_count': transitive_count }
def get_version_information(input_list, ecosystem): """Fetch the version information for each of the packages. Also remove EPVs with CVEs and ones not present in Graph """ str_query = "data=[]; " for package in input_list: str_query += "pkg = g.V().has('ecosystem', '{eco}').has('name', '{pkg}'); " \ "lnv = []; pkg.clone().values('latest_non_cve_version', " \ "'latest_version').fill(lnv); pkg.clone().as('package').V()." \ "has('pecosystem', '{eco}').has('pname', '{pkg}')." \ "has('version', within(lnv)).as('version')." \ "select('package', 'version').by(valueMap()).fill(data);".format( eco=ecosystem, pkg=package) str_query += "data" payload = {'gremlin': str_query} # Query Gremlin with packages list to get their version information gremlin_response = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if gremlin_response is None: return [] response = get_response_data(gremlin_response, [{0: 0}]) return response
def read_packages(): """Read all the packages last updated.""" print("read_packages() started") prev_date = (datetime.utcnow() - timedelta(1)).strftime('%Y%m%d') query_str = "g.V().has('latest_version_last_updated',prev_date).valueMap()" # prev_date = '20180824' # for testing purpose, change date here payload = {'gremlin': query_str, 'bindings': {'prev_date': prev_date}} gremlin_response = execute_gremlin_dsl(payload) if gremlin_response is not None: result_data = get_response_data(gremlin_response, [{0: 0}]) else: print( "Exception occured while trying to fetch packages : read_package") sys.exit() for result in result_data: eco = get_value(result, 'ecosystem') name = get_value(result, 'name') if not eco + ":" + name in PACKAGE_DATA: PACKAGE_DATA[eco + ":" + name] = {} PACKAGE_DATA[eco + ":" + name] = { 'name': name, 'ecosystem': eco, 'versions': [] } print("read_packages() ended")
def get_version_information(input_list, ecosystem): """Fetch the version information for each of the packages. Also remove EPVs with CVEs and ones not present in Graph """ input_packages = [package for package in input_list] str_query = "g.V().has('ecosystem',ecosystem).has('name',within(input_packages))" \ ".as('package').out('has_version')" \ ".not(outE('has_cve')).as('version').select('package','version')." \ "by(valueMap()).dedup()" payload = { 'gremlin': str_query, 'bindings': { 'ecosystem': ecosystem, 'input_packages': input_packages } } # Query Gremlin with packages list to get their version information gremlin_response = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if gremlin_response is None: return [] response = get_response_data(gremlin_response, [{0: 0}]) return response
def read_packages(): """Read all the packages last updated.""" prev_date = (datetime.utcnow() - timedelta(1)).strftime('%Y%m%d') query_str = "g.V().has('latest_version_last_updated',prev_date).valueMap()" # prev_date = '20180805' for testing purpose, change date here payload = { 'gremlin': query_str, 'bindings': { 'prev_date': prev_date } } gremlin_response = execute_gremlin_dsl(payload) result_data = get_response_data(gremlin_response, [{0: 0}]) for result in result_data: tmp_json = {} tmp_json['latest'] = get_value(result, 'latest_version') tmp_json['libio'] = get_value(result, 'libio_latest_version') eco = get_value(result, 'ecosystem') name = get_value(result, 'name') if not eco + ":" + name in PACKAGE_DATA: PACKAGE_DATA[eco + ":" + name] = {} tmp_json['name'] = name tmp_json['ecosystem'] = eco PACKAGE_DATA[eco + ":" + name] = tmp_json
def remove_cve_versions(): """Remove CVE versions.""" print("remove_cve_versions() started") pkg_list = [] ver_list = [] eco_lst = [] license_lst = [] for pkg in PACKAGE_DATA: if not PACKAGE_DATA[pkg]['name'] in pkg_list: pkg_list.append(PACKAGE_DATA[pkg]['name']) if not PACKAGE_DATA[pkg]['latest'] in ver_list: ver_list.append(PACKAGE_DATA[pkg]['latest']) if not PACKAGE_DATA[pkg]['libio'] in ver_list: ver_list.append(PACKAGE_DATA[pkg]['libio']) if not PACKAGE_DATA[pkg]['ecosystem'] in eco_lst: eco_lst.append(PACKAGE_DATA[pkg]['ecosystem']) query_str = "g.V().has('pecosystem',within(eco_lst))." \ "has('pname',within(pkg_list))" \ ".has('version',within(ver_list)).valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'pkg_list': pkg_list, 'ver_list': ver_list, 'eco_lst': eco_lst } } gremlin_response = execute_gremlin_dsl(payload) if gremlin_response is not None: result_data = get_response_data(gremlin_response, [{0: 0}]) else: print("Exception occured while trying to fetch versions : remove_cve_versions") sys.exit() for result in result_data: name = get_value(result, 'pname') eco = get_value(result, 'pecosystem') ver = get_value(result, 'version') if 'cve_ids' in result: if PACKAGE_DATA[eco + ":" + name]['latest'] == ver: del PACKAGE_DATA[eco + ":" + name]['latest'] elif PACKAGE_DATA[eco + ":" + name]['libio'] == ver: del PACKAGE_DATA[eco + ":" + name]['libio'] if 'libio' not in PACKAGE_DATA[eco + ":" + name] \ and 'latest' not in PACKAGE_DATA[eco + ":" + name]: del PACKAGE_DATA[eco + ":" + name] else: del license_lst[:] if 'licenses' in result: for lic in result['licenses']: license_lst.append(lic) key = eco + ":" + name + ":" + ver NEW_VERSION_DATA[key] = {} NEW_VERSION_DATA[key]['version'] = ver NEW_VERSION_DATA[key]['package'] = eco + ":" + name NEW_VERSION_DATA[key]['license'] = license_lst print("remove_cve_versions() ended")
def test_execute_gremlin_dsl2(): """Test the function execute_gremlin_dsl.""" query_str = "g.V().has('ecosystem', eco).has('name',pkg).valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'eco': 'maven', 'pkg': 'io.vertx:vertx-web' } } out = execute_gremlin_dsl(payload) assert out is None
def test_execute_gremlin_dsl(mocker): """Test the function get_version_information.""" mocker.return_value = "" query_str = "g.V().has('ecosystem', eco).has('name',pkg).valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'eco': 'maven', 'pkg': 'io.vertx:vertx-web' } } out = execute_gremlin_dsl(payload) assert out is None
def perform_license_analysis(license_score_list, dependencies): """Pass given license_score_list to stack_license analysis and process response.""" license_url = LICENSE_SCORING_URL_REST + "/api/v1/stack_license" payload = { "packages": license_score_list } resp = {} flag_stack_license_exception = False # TODO: refactoring try: resp = execute_gremlin_dsl(url=license_url, payload=payload) # lic_response.raise_for_status() # raise exception for bad http-status codes if not resp: raise requests.exceptions.RequestException except requests.exceptions.RequestException: current_app.logger.exception("Unexpected error happened while invoking license analysis!") flag_stack_license_exception = True msg = None stack_license = [] stack_license_status = None unknown_licenses = [] license_conflict_packages = [] license_outliers = [] if not flag_stack_license_exception: list_components = resp.get('packages', []) for comp in list_components: # output from license analysis for dep in dependencies: # the known dependencies if dep.get('name', '') == comp.get('package', '') and \ dep.get('version', '') == comp.get('version', ''): dep['license_analysis'] = comp.get('license_analysis', {}) msg = resp.get('message') _stack_license = resp.get('stack_license', None) if _stack_license is not None: stack_license = [_stack_license] stack_license_status = resp.get('status', None) unknown_licenses = _extract_unknown_licenses(resp) license_conflict_packages = _extract_conflict_packages(resp) license_outliers = _extract_license_outliers(resp) output = { "reason": msg, "status": stack_license_status, "f8a_stack_licenses": stack_license, "unknown_licenses": unknown_licenses, "conflict_packages": license_conflict_packages, "outlier_packages": license_outliers } return output, dependencies
def get_tr_dependency_data(epv_set): """Get transitive dependency data from graph.""" query = "epv=[];" tr_epv_list = {"result": {"data": []}} batch_query = "g.V().has('pecosystem', '{eco}').has('pname', '{name}')." \ "has('version', '{ver}').dedup().as('version').select('version')." \ "coalesce(out('has_cve').as('cve')." \ "select('version','cve').by(valueMap())" \ ", select('version', 'version').by(valueMap()))" \ ".fill(epv);" i = 1 epvs = [x for x, y in epv_set['transitive'].items()] tr_list = [] for epv in epvs: eco, name, ver = epv.split('|#|') tr_list.append((name, ver)) query += batch_query.format(eco=eco, name=name, ver=ver) if i >= GREMLIN_QUERY_SIZE: i = 1 # call_gremlin in batch payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: tr_epv_list['result']['data'] += result['result']['data'] query = "epv=[];" i += 1 if i > 1: payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: tr_epv_list['result']['data'] += result['result']['data'] return tr_epv_list, tr_list
def get_recommended_version(ecosystem, name, version): """Fetch the recommended version in case of CVEs.""" query = "g.V().has('ecosystem', '{eco}').has('name', '{pkg}')" \ ".out('has_version').not(out('has_cve')).values('version');"\ .format(eco=ecosystem, pkg=name) payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: versions = result['result']['data'] if len(versions) == 0: return None else: return None rec_version = version for ver in versions: rec_version = select_latest_version(ver, rec_version) if rec_version == version: return None return rec_version
def get_version_data(pkg_data, new_ver_data, version_data, tr_flag="false"): """Get all the version info for the packages.""" print("get_version_data() started") pkg_list = [] eco_list = [] license_list = [] for repo in REPO_DATA: if tr_flag is "true" and "tr_dependencies" in REPO_DATA[repo]: deps = REPO_DATA[repo]['tr_dependencies'] elif tr_flag is "false": deps = REPO_DATA[repo]['dependencies'] else: continue for dep in deps: dep_data = version_data[dep] pkg_list.append(dep_data['name']) eco_list.append(REPO_DATA[repo]['ecosystem']) query_str = "g.V().has('pecosystem',within(eco_list))." \ "has('pname',within(pkg_list))" \ ".hasNot('cve_ids').valueMap().dedup()" payload = { 'gremlin': query_str, 'bindings': { 'pkg_list': list(set(pkg_list)), 'eco_list': list(set(eco_list)) } } gremlin_response = execute_gremlin_dsl(payload) if gremlin_response is not None: result_data = get_response_data(gremlin_response, [{0: 0}]) else: print( "Exception occured while trying to fetch versions : remove_cve_versions" ) sys.exit() for result in result_data: name = get_value(result, 'pname') eco = get_value(result, 'pecosystem') ver = get_value(result, 'version') pkg_key = eco + ":" + name del license_list[:] if 'licenses' in result: for lic in result['licenses']: license_list.append(lic) key = eco + ":" + name + ":" + ver new_ver_data[key] = { 'version': ver, 'package': eco + ":" + name, 'license': license_list } if pkg_key not in pkg_data: pkg_data[pkg_key] = { 'name': name, 'ecosystem': eco, 'versions': [] } pkg_data[pkg_key]['versions'].append(key) print("get_version_data() ended")
def get_repos(): """Read all the repo data.""" print("get_repos() started") pkg_list = [] license_list = [] eco_list = [] for pkg in PACKAGE_DATA: pkg_list.append(PACKAGE_DATA[pkg]['name']) eco_list.append(PACKAGE_DATA[pkg]['ecosystem']) query_str = "g.V().has('pecosystem',within(eco_list))." \ "has('pname',within(pkg_list))" \ ".in('has_dependency').valueMap()" payload = { 'gremlin': query_str, 'bindings': { 'pkg_list': pkg_list, 'eco_list': eco_list } } gremlin_response = execute_gremlin_dsl(payload) if gremlin_response is not None: result_data = get_response_data(gremlin_response, [{0: 0}]) else: print("Exception occured while trying to fetch repo : get_repos") sys.exit() repo_list = [] for data in result_data: repo_list.append(get_value(data, 'repo_url')) query_str = "g.V().has('repo_url', within(repo_list)).as('a')." \ "out('has_dependency').as('b').select('a','b').by(valueMap())" payload = {'gremlin': query_str, 'bindings': {'repo_list': repo_list}} gremlin_response = execute_gremlin_dsl(payload) if gremlin_response is not None: result_data = get_response_data(gremlin_response, [{0: 0}]) else: print("Exception occured while trying to fetch versions : get_repos") sys.exit() for result in result_data: repo = get_value(result['a'], 'repo_url') del license_list[:] if 'licenses' in result['b']: licenses = result['b']['licenses'] for lic in licenses: license_list.append(lic) eco = get_value(result['b'], 'pecosystem') name = get_value(result['b'], 'pname') version = get_value(result['b'], 'version') key = eco + ":" + name + ":" + version VERSION_DATA[key] = { 'version': version, 'name': name, 'package': eco + ":" + name, 'license': license_list } if repo not in REPO_DATA: REPO_DATA[repo] = {} REPO_DATA[repo]['ecosystem'] = eco if 'dependencies' not in REPO_DATA[repo]: REPO_DATA[repo]['dependencies'] = [] key = eco + ":" + name + ":" + version if key not in REPO_DATA[repo]['dependencies']: REPO_DATA[repo]['dependencies'].append(key) query_str = "g.V().has('repo_url', within(repo_list)).as('a')." \ "out('has_transitive_dependency').as('b').select('a','b').by(valueMap())" payload = {'gremlin': query_str, 'bindings': {'repo_list': repo_list}} gremlin_response = execute_gremlin_dsl(payload) if gremlin_response is not None: result_data = get_response_data(gremlin_response, [{0: 0}]) else: print("Exception occured while trying to fetch versions : get_repos") sys.exit() for result in result_data: repo = get_value(result['a'], 'repo_url') eco = get_value(result['b'], 'pecosystem') name = get_value(result['b'], 'pname') version = get_value(result['b'], 'version') key = eco + ":" + name + ":" + version TRANSITIVE_VERSION_DATA[key] = { 'version': version, 'name': name, 'package': eco + ":" + name } pkg_key = eco + ":" + name TRANSITIVE_PACKAGE_DATA[pkg_key] = { 'ecosystem': eco, 'name': name, 'versions': [] } if repo not in REPO_DATA: REPO_DATA[repo] = {} REPO_DATA[repo]['ecosystem'] = eco if 'tr_dependencies' not in REPO_DATA[repo]: REPO_DATA[repo]['tr_dependencies'] = [] key = eco + ":" + name + ":" + version if key not in REPO_DATA[repo]['tr_dependencies']: REPO_DATA[repo]['tr_dependencies'].append(key) print("get_repos() ended")
def get_dependency_data(epv_set): """Get dependency data from graph.""" epv_list = {"result": {"data": [], "unknown_deps": []}} dep_list = {} unknown_deps_list = [] query = "epv=[];" batch_query = "a = g.V().has('pecosystem', '{eco}').has('pname', '{name}')." \ "has('version', '{ver}').dedup(); a.clone().as('version')." \ "in('has_version').dedup().as('package').select('version')." \ "coalesce(out('has_cve').as('cve')." \ "select('package','version','cve').by(valueMap())," \ "select('package','version').by(valueMap()))." \ "fill(epv);" i = 1 epvs = [x for x, y in epv_set['direct'].items()] for epv in epvs: eco, name, ver = epv.split('|#|') dep_list[name] = ver query += batch_query.format(eco=eco, name=name, ver=ver) if i >= GREMLIN_QUERY_SIZE: i = 1 # call_gremlin in batch payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: epv_list['result']['data'] += result['result']['data'] query = "epv=[];" i += 1 if i > 1: payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: epv_list['result']['data'] += result['result']['data'] query = "epv=[];" batch_query = "g.V().has('pecosystem', '{eco}').has('pname', '{name}')." \ "has('version', '{ver}').dedup().as('version')." \ "out('has_cve').as('cve')." \ "select('version','cve').by(valueMap())." \ "fill(epv);" i = 1 epvs = [x for x, y in epv_set['transitive'].items()] for epv in epvs: eco, name, ver = epv.split('|#|') dep_list[name] = ver query += batch_query.format(eco=eco, name=name, ver=ver) if i >= GREMLIN_QUERY_SIZE: i = 1 # call_gremlin in batch payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: epv_list['result']['data'] += result['result']['data'] query = "epv=[];" i += 1 if i > 1: payload = {'gremlin': query} result = execute_gremlin_dsl(url=GREMLIN_SERVER_URL_REST, payload=payload) if result: epv_list['result']['data'] += result['result']['data'] # Identification of unknown dependencies epv_data = epv_list['result']['data'] for k, v in dep_list.items(): known_flag = False for knowndep in epv_data: version_node = knowndep['version'] if k == knowndep['version']['pname'][0] and v == knowndep['version']['version'][0] \ and (version_node.get('licenses') or version_node.get('declared_licenses')): known_flag = True break if not known_flag: unknown_deps_list.append({'name': k, 'version': v}) result = add_transitive_details(epv_list, epv_set) return {'result': result, 'unknown_deps': unknown_deps_list}