def _import_grouped_keys_http(data_source, dict_grouped_keys):
    logger.debug("Begin import...")
    date_time_format = "%Y-%m-%dT%H:%M:%S.%f"

    report = {'status': 'Success', 'message': 'The import finished successfully!'}
    count_imported_EPVs = 0
    max_finished_at = None
    max_datetime = None
    last_imported_EPV = None
    epv = []
    if len(dict_grouped_keys.items()) == 0:
        report['message'] = 'Nothing to be imported! No data found on S3 to be imported!'
    try:
        for counter, v in dict_grouped_keys.items():
            first_key = v[0]
            obj, cur_finished_at = _first_key_info(data_source, first_key)
            if obj is None:
                continue
            obj_returned = _other_key_info(data_source, other_keys=v[1:])
            obj.update(obj_returned)

            str_gremlin = GraphPopulator.create_query_string(obj)
            logger.debug("Importing " + first_key)
            logger.debug("File---- %s  numbered---- %d  added:" % (first_key, counter))

            # Fire Gremlin HTTP query now
            logger.info("Ingestion initialized for EPV - " +
                        obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version'))
            epv.append(obj.get('ecosystem') + ":" + obj.get('package') + ":" + obj.get('version'))
            payload = {'gremlin': str_gremlin}
            response = requests.post(config.GREMLIN_SERVER_URL_REST, data=json.dumps(payload))
            resp = response.json()

            if resp['status']['code'] == 200:
                count_imported_EPVs += 1
                last_imported_EPV = first_key
                max_finished_at = _set_max_finished_at(max_finished_at, cur_finished_at, max_datetime, date_time_format)
                max_datetime = datetime.strptime(max_finished_at, date_time_format)

        report['epv'] = epv

    except Exception as e:
        msg = _get_exception_msg("The import failed", e)
        report['status'] = 'Failure'
        report['message'] = msg

    report['count_imported_EPVs'] = count_imported_EPVs
    report['last_imported_EPV'] = last_imported_EPV
    report['max_finished_at'] = max_finished_at
    return report
def _import_keys_from_s3_http(data_source, epv_list):
    logger.debug("Begin import...")
    report = {
        'status': 'Success',
        'message': 'The import finished successfully!'
    }
    count_imported_EPVs = 0
    last_imported_EPV = None
    epv = []
    for epv_key in epv_list:
        for key, contents in epv_key.items():
            if len(contents.get('pkg_list_keys')) == 0 and len(
                    contents.get('ver_list_keys')) == 0:
                report[
                    'message'] = 'Nothing to be imported! No data found on S3 to be imported!'
                continue
            obj = {
                'ecosystem': contents.get('ecosystem'),
                'package': contents.get('package'),
                'version': contents.get('version')
            }

            try:
                # Check other Version level information and add it to common object
                if len(contents.get('ver_list_keys')) > 0:
                    first_key = contents['ver_key_prefix'] + '.json'
                    first_obj = _first_key_info(data_source, first_key,
                                                config.AWS_EPV_BUCKET)
                    obj.update(first_obj)
                    ver_obj = _other_key_info(data_source,
                                              contents.get('ver_list_keys'),
                                              config.AWS_EPV_BUCKET)
                    if 'analyses' in obj:
                        obj.get('analyses', {}).update(ver_obj['analyses'])
                    else:
                        obj.update(ver_obj)

                # Check Package related information and add it to package object
                if len(contents.get('pkg_list_keys')) > 0:
                    pkg_obj = _other_key_info(data_source,
                                              contents.get('pkg_list_keys'),
                                              config.AWS_PKG_BUCKET)
                    if 'analyses' in obj:
                        obj.get('analyses', {}).update(pkg_obj['analyses'])
                    else:
                        obj.update(pkg_obj)

                # Create Gremlin Query
                str_gremlin = GraphPopulator.create_query_string(obj)

                if str_gremlin:
                    # Fire Gremlin HTTP query now
                    logger.info("Ingestion initialized for EPV - " +
                                obj.get('ecosystem') + ":" +
                                obj.get('package') + ":" + obj.get('version'))
                    epv.append(
                        obj.get('ecosystem') + ":" + obj.get('package') + ":" +
                        obj.get('version'))
                    payload = {'gremlin': str_gremlin}
                    response = requests.post(config.GREMLIN_SERVER_URL_REST,
                                             data=json.dumps(payload),
                                             timeout=30)
                    resp = response.json()

                    if resp['status']['code'] == 200:
                        count_imported_EPVs += 1
                        last_imported_EPV = (obj.get('ecosystem') + ":" +
                                             obj.get('package') + ":" +
                                             obj.get('version'))

            except Exception as e:
                msg = _get_exception_msg("The import failed", e)
                report['status'] = 'Failure'
                report['message'] = msg
                report['epv'] = epv_key

    report['epv'] = epv_list
    report['count_imported_EPVs'] = count_imported_EPVs
    if count_imported_EPVs == 0 and report['status'] == 'Success':
        report['message'] = 'Nothing to be synced to Graph!'
    report['last_imported_EPV'] = last_imported_EPV

    return report
Пример #3
0
def _import_keys_from_s3_http(data_source, epv_list):
    # TODO: reduce cyclomatic complexity
    logger.debug("Begin import...")
    report = {
        'status': 'Success',
        'message': 'The import finished successfully!'
    }
    count_imported_EPVs = 0
    last_imported_EPV = None
    epv = []
    for epv_key in epv_list:
        for key, contents in epv_key.items():
            if len(contents.get('pkg_list_keys')) == 0 and len(
                    contents.get('ver_list_keys')) == 0:
                report[
                    'message'] = 'Nothing to be imported! No data found on S3 to be imported!'
                continue
            pkg_ecosystem = contents.get('ecosystem')
            pkg_name = contents.get('package')
            pkg_version = contents.get('version') or ''
            pkg_source = contents.get('source_repo', pkg_ecosystem)

            obj = {
                'ecosystem': pkg_ecosystem,
                'package': pkg_name,
                'version': pkg_version,
                'source_repo': pkg_source
            }

            try:
                # Check other Version level information and add it to common object
                if len(contents.get('ver_list_keys')) > 0:
                    first_key = contents['ver_key_prefix'] + '.json'
                    first_obj = _first_key_info(data_source, first_key,
                                                config.AWS_EPV_BUCKET)
                    obj.update(first_obj)
                    ver_obj = _other_key_info(data_source,
                                              contents.get('ver_list_keys'),
                                              config.AWS_EPV_BUCKET)
                    if 'analyses' in obj:
                        obj.get('analyses', {}).update(ver_obj['analyses'])
                    else:
                        obj.update(ver_obj)

                # Check Package related information and add it to package object
                if len(contents.get('pkg_list_keys')) > 0:
                    pkg_obj = _other_key_info(data_source,
                                              contents.get('pkg_list_keys'),
                                              config.AWS_PKG_BUCKET)
                    if 'analyses' in obj:
                        obj.get('analyses', {}).update(pkg_obj['analyses'])
                    else:
                        obj.update(pkg_obj)

                # Create Gremlin Query
                str_gremlin = GraphPopulator.create_query_string(obj)

                if str_gremlin:
                    # Fire Gremlin HTTP query now
                    epv_full = pkg_ecosystem + ":" + pkg_name + ":" + pkg_version
                    logger.info("Ingestion initialized for EPV - %s" %
                                epv_full)
                    epv.append(epv_full)
                    payload = {'gremlin': str_gremlin}
                    response = requests.post(config.GREMLIN_SERVER_URL_REST,
                                             data=json.dumps(payload),
                                             timeout=30)
                    resp = response.json()

                    if resp['status']['code'] == 200:
                        count_imported_EPVs += 1
                        last_imported_EPV = (obj.get('ecosystem') + ":" +
                                             obj.get('package') + ":" +
                                             obj.get('version'))

                        # update first key with graph synced tag
                        logger.info("Mark as synced in RDS %s" %
                                    last_imported_EPV)
                        if not config.AWS_S3_IS_LOCAL:  # pragma: no cover
                            PostgresHandler().mark_epv_synced(
                                obj.get('ecosystem'), obj.get('package'),
                                obj.get('version'))

            except Exception as e:  # pragma: no cover
                logger.error(e)
                msg = _get_exception_msg("The import failed", e)
                report['status'] = 'Failure'
                report['message'] = msg
                report['epv'] = epv_key

    report['epv'] = epv_list
    report['count_imported_EPVs'] = count_imported_EPVs
    if count_imported_EPVs == 0 and report['status'] == 'Success':
        report['message'] = 'Nothing to be synced to Graph!'
    report['last_imported_EPV'] = last_imported_EPV

    return report