def import_bulk(data_source, book_keeper):
    """
    Imports bulk data from the given data source.
    It can perform both 'full import' as well as 'incremental update'.

    :param data_source: Data source to read input from
    :param book_keeper: Book keeper to get info about recently ingested data
    :return: None
    """
    try:
        # Now, get the last incremental update timestamp from the graph.
        graph_meta = GraphPopulator.get_metadata()

        # If the timestamp is unknown then it means graph is not populated yet and we need to do full import.
        list_keys = []
        if graph_meta is None:
            # Collect all the files from data-source and group them by package-version.
            logger.debug("Performing full import. Fetching all objects from : " + data_source.get_source_name())
            list_keys = data_source.list_files()

        # else if the timestamp is available then we need to perform incremental update.
        else:
            if book_keeper is None:
                raise RuntimeError("Cannot perform incremental update without book keeper!")

            # Collect all the package-version from RDS table that were updated recently.
            # Note: If RDS table is unreachable then we should still live with S3 data.
            min_finished_at = graph_meta.last_incr_update_ts
            list_epv = book_keeper.get_recent_epv(min_finished_at)

            # Collect relevant files from data-source and group them by package-version.
            logger.debug("Performing incremental update. Fetching some objects from : " + data_source.get_source_name())
            for epv in list_epv:
                key_prefix = epv.get('ecosystem') + "/" + epv.get('name') + "/" + epv.get('version')
                list_keys.extend(data_source.list_files(prefix=key_prefix))
        # end of if graph_meta is None:

        # Import the S3 data
        dict_grouped_keys = _group_keys_by_epv(list_keys, data_source)
        report = _import_grouped_keys(data_source, dict_grouped_keys)

        # In the end, update the meta-data in the graph.
        if report.get('max_finished_at') is not None:
            dict_graph_meta = {
                'last_incremental_update_timestamp': report.get('max_finished_at'),
                'last_imported_epv': report.get('last_imported_EPV')
            }
            GraphPopulator.update_metadata(dict_graph_meta)
        _log_report_msg("import_bulk()", report)

    except Exception as e:
        msg = _get_exception_msg("import_bulk() failed with error", e)
        raise RuntimeError(msg)

    return report
def update_graph_metadata(input_json):
    GraphPopulator.update_metadata(input_json)