def _import_grouped_keys_http(data_source, dict_grouped_keys): logger.debug("Begin import...") date_time_format = "%Y-%m-%dT%H:%M:%S.%f" report = {'status': 'Success', 'message': 'The import finished successfully!'} count_imported_EPVs = 0 max_finished_at = None max_datetime = None last_imported_EPV = None epv = [] if len(dict_grouped_keys.items()) == 0: report['message'] = 'Nothing to be imported! No data found on S3 to be imported!' try: for counter, v in dict_grouped_keys.items(): first_key = v[0] obj, cur_finished_at = _first_key_info(data_source, first_key) if obj is None: continue obj_returned = _other_key_info(data_source, other_keys=v[1:]) obj.update(obj_returned) str_gremlin = GraphPopulator.create_query_string(obj) logger.debug("Importing " + first_key) logger.debug("File---- %s numbered---- %d added:" % (first_key, counter)) # Fire Gremlin HTTP query now logger.info("Ingestion initialized for EPV - " + obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) epv.append(obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) payload = {'gremlin': str_gremlin} response = requests.post(config.GREMLIN_SERVER_URL_REST, data=json.dumps(payload)) resp = response.json() if resp['status']['code'] == 200: count_imported_EPVs += 1 last_imported_EPV = first_key max_finished_at = _set_max_finished_at(max_finished_at, cur_finished_at, max_datetime, date_time_format) max_datetime = datetime.strptime(max_finished_at, date_time_format) report['epv'] = epv except Exception as e: msg = _get_exception_msg("The import failed", e) report['status'] = 'Failure' report['message'] = msg report['count_imported_EPVs'] = count_imported_EPVs report['last_imported_EPV'] = last_imported_EPV report['max_finished_at'] = max_finished_at return report
def _import_keys_from_s3_http(data_source, epv_list): logger.debug("Begin import...") report = { 'status': 'Success', 'message': 'The import finished successfully!' } count_imported_EPVs = 0 last_imported_EPV = None epv = [] for epv_key in epv_list: for key, contents in epv_key.items(): if len(contents.get('pkg_list_keys')) == 0 and len( contents.get('ver_list_keys')) == 0: report[ 'message'] = 'Nothing to be imported! No data found on S3 to be imported!' continue obj = { 'ecosystem': contents.get('ecosystem'), 'package': contents.get('package'), 'version': contents.get('version') } try: # Check other Version level information and add it to common object if len(contents.get('ver_list_keys')) > 0: first_key = contents['ver_key_prefix'] + '.json' first_obj = _first_key_info(data_source, first_key, config.AWS_EPV_BUCKET) obj.update(first_obj) ver_obj = _other_key_info(data_source, contents.get('ver_list_keys'), config.AWS_EPV_BUCKET) if 'analyses' in obj: obj.get('analyses', {}).update(ver_obj['analyses']) else: obj.update(ver_obj) # Check Package related information and add it to package object if len(contents.get('pkg_list_keys')) > 0: pkg_obj = _other_key_info(data_source, contents.get('pkg_list_keys'), config.AWS_PKG_BUCKET) if 'analyses' in obj: obj.get('analyses', {}).update(pkg_obj['analyses']) else: obj.update(pkg_obj) # Create Gremlin Query str_gremlin = GraphPopulator.create_query_string(obj) if str_gremlin: # Fire Gremlin HTTP query now logger.info("Ingestion initialized for EPV - " + obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) epv.append( obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) payload = {'gremlin': str_gremlin} response = requests.post(config.GREMLIN_SERVER_URL_REST, data=json.dumps(payload), timeout=30) resp = response.json() if resp['status']['code'] == 200: count_imported_EPVs += 1 last_imported_EPV = (obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) except Exception as e: msg = _get_exception_msg("The import failed", e) report['status'] = 'Failure' report['message'] = msg report['epv'] = epv_key report['epv'] = epv_list report['count_imported_EPVs'] = count_imported_EPVs if count_imported_EPVs == 0 and report['status'] == 'Success': report['message'] = 'Nothing to be synced to Graph!' report['last_imported_EPV'] = last_imported_EPV return report
def _import_keys_from_s3_http(data_source, epv_list): # TODO: reduce cyclomatic complexity logger.debug("Begin import...") report = { 'status': 'Success', 'message': 'The import finished successfully!' } count_imported_EPVs = 0 last_imported_EPV = None epv = [] for epv_key in epv_list: for key, contents in epv_key.items(): if len(contents.get('pkg_list_keys')) == 0 and len( contents.get('ver_list_keys')) == 0: report[ 'message'] = 'Nothing to be imported! No data found on S3 to be imported!' continue pkg_ecosystem = contents.get('ecosystem') pkg_name = contents.get('package') pkg_version = contents.get('version') or '' pkg_source = contents.get('source_repo', pkg_ecosystem) obj = { 'ecosystem': pkg_ecosystem, 'package': pkg_name, 'version': pkg_version, 'source_repo': pkg_source } try: # Check other Version level information and add it to common object if len(contents.get('ver_list_keys')) > 0: first_key = contents['ver_key_prefix'] + '.json' first_obj = _first_key_info(data_source, first_key, config.AWS_EPV_BUCKET) obj.update(first_obj) ver_obj = _other_key_info(data_source, contents.get('ver_list_keys'), config.AWS_EPV_BUCKET) if 'analyses' in obj: obj.get('analyses', {}).update(ver_obj['analyses']) else: obj.update(ver_obj) # Check Package related information and add it to package object if len(contents.get('pkg_list_keys')) > 0: pkg_obj = _other_key_info(data_source, contents.get('pkg_list_keys'), config.AWS_PKG_BUCKET) if 'analyses' in obj: obj.get('analyses', {}).update(pkg_obj['analyses']) else: obj.update(pkg_obj) # Create Gremlin Query str_gremlin = GraphPopulator.create_query_string(obj) if str_gremlin: # Fire Gremlin HTTP query now epv_full = pkg_ecosystem + ":" + pkg_name + ":" + pkg_version logger.info("Ingestion initialized for EPV - %s" % epv_full) epv.append(epv_full) payload = {'gremlin': str_gremlin} response = requests.post(config.GREMLIN_SERVER_URL_REST, data=json.dumps(payload), timeout=30) resp = response.json() if resp['status']['code'] == 200: count_imported_EPVs += 1 last_imported_EPV = (obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version')) # update first key with graph synced tag logger.info("Mark as synced in RDS %s" % last_imported_EPV) if not config.AWS_S3_IS_LOCAL: # pragma: no cover PostgresHandler().mark_epv_synced( obj.get('ecosystem'), obj.get('package'), obj.get('version')) except Exception as e: # pragma: no cover logger.error(e) msg = _get_exception_msg("The import failed", e) report['status'] = 'Failure' report['message'] = msg report['epv'] = epv_key report['epv'] = epv_list report['count_imported_EPVs'] = count_imported_EPVs if count_imported_EPVs == 0 and report['status'] == 'Success': report['message'] = 'Nothing to be synced to Graph!' report['last_imported_EPV'] = last_imported_EPV return report