def extract_source(source, entities, config, output_dir):
    """
    Full JSON BrAPI source extraction process
    """
    source_name = source['schema:identifier']
    action = 'extract-' + source_name
    log_file = get_file_path([config['log-dir'], action],
                             ext='.log',
                             recreate=True)
    logger = create_logger(action, log_file, config['options']['verbose'])
    pool = ThreadPool(10)

    logger.info("Extracting BrAPI {}...".format(source_name))
    try:
        # Initialize JSON merge stores
        for (entity_name, entity) in entities.items():
            entity['store'] = MergeStore(source['schema:identifier'],
                                         entity['name'])

        # Fetch server implemented calls
        if 'implemented-calls' not in source:
            source['implemented-calls'] = get_implemented_calls(source, logger)

        # Fetch entities lists
        fetch_all_list(source, logger, entities, pool)

        # Detail entities
        fetch_all_details(source, logger, entities, pool)

        # Link entities (internal links, internal object links and external object links)
        fetch_all_links(source, logger, entities)

        # Detail entities (for object that might have been discovered by links)
        fetch_all_details(source, logger, entities, pool)

        remove_internal_objects(entities)

        logger.info("SUCCEEDED Extracting BrAPI {}.".format(source_name))
    except:
        logger.debug(traceback.format_exc())
        shutil.rmtree(output_dir)
        output_dir = output_dir + '-failed'
        logger.info(
            "FAILED Extracting BrAPI {}.\n"
            "=> Check the logs ({}) and data ({}) for more details.".format(
                source_name, log_file, output_dir))
    pool.close()

    # Save to file
    logger.info("Saving BrAPI {} to '{}'...".format(source_name, output_dir))
    for (entity_name, entity) in entities.items():
        entity['store'].save(output_dir)
        entity['store'].clear()
示例#2
0
def load_source(source, config, source_bulk_dir, log_dir):
    """
    Full Elasticsearch documents indexing
    """
    source_name = source['schema:identifier']
    action = 'load-elasticsearch-' + source_name
    log_file = get_file_path([log_dir, action], ext='.log', recreate=True)
    logger = create_logger(source_name, log_file, config['verbose'])

    load_config = config['load-elasticsearch']
    es_client = init_es_client(load_config['url'], logger)

    logger.info("Loading '{}' into elasticsearch '{}'...".format(source_bulk_dir, load_config['url']))
    try:
        if not os.path.exists(source_bulk_dir):
            raise FileNotFoundError(
                'No such file or directory: \'{}\'.\n'
                'Please make sure you have run the BrAPI extraction and Elasticsearch document transformation'
                ' before trying to launch the transformation process.'
                .format(source_bulk_dir))

        bulk_files = list(list_entity_files(source_bulk_dir))
        all_document_types = set(map(first, bulk_files))
        document_types = load_config.get('document-types') or all_document_types
        document_types = document_types.intersection(all_document_types)

        index_by_document = dict()

        logger.info("Preparing index with template mapping...")
        timestamp = int(time.time())
        for document_type in document_types:
            base_index_name = replace_template(
                load_config['index-template'],
                {'source': source['schema:identifier'], 'documentType': document_type}
            ).lower()
            create_template(es_client, load_config, document_type, base_index_name, logger)

            index_name = base_index_name + '-d' + str(timestamp)
            create_index(es_client, index_name, logger)
            index_by_document[document_type] = base_index_name, index_name

        logger.info("Bulk indexing...")
        for document_type, file_path in bulk_files:
            if document_type in index_by_document:
                base_index_name, index_name = index_by_document[document_type]
                bulk_index(es_client, index_name, file_path, logger)

        logger.info("Creating index aliases and deleting old indices...")
        for document_type, (base_index_name, index_name) in index_by_document.items():
            create_alias(es_client, index_name, base_index_name, logger)
            new_index, *old_indices = get_indices(es_client, base_index_name)
            for old_index in old_indices[1:]:
                delete_index(es_client, old_index, logger)

        logger.info("SUCCEEDED Loading {}.".format(source_name))
    except Exception as e:
        logger.debug(traceback.format_exc())
        logger.debug(getattr(e, 'long_message', ''))
        logger.info("FAILED Loading {} Elasticsearch documents.\n"
                    "=> Check the logs ({}) for more details."
                    .format(source_name, log_file))