示例#1
0
def cleanup():
    yield None
    config = get_basic_utilities().get(CONFIG)
    arango_client = get_singleton_arango_client(config['arango'])
    collection = 'test'
    collection_exists = collection in arango_client.db.collections
    if collection_exists:
        arango_client.db.collections[collection].delete()
        del arango_client.db.collections[collection]
    clickhouse: ClickhouseHelper = get_singleton_ch_client(
        config['clickhouse'])
    clickhouse.drop_table_if_exists(collection)
    clickhouse.drop_table_if_exists(f'{collection}_Buffer')
示例#2
0
def get_test_table():
    config = get_basic_utilities().get(CONFIG)
    arango_client = get_singleton_arango_client(config['arango'])
    if 'test' in arango_client.db.collections:
        return arango_client.db.collections['test']
    return arango_client.db.createCollection(name='test')
示例#3
0
def test_arango_connection(basic_utilities):
    config = basic_utilities.get(CONFIG)
    get_singleton_arango_client(config['arango'])
    return True
示例#4
0
def test_get_singleton_arango_client(basic_utilities):
    config = basic_utilities.get(CONFIG)
    arango_client = get_singleton_arango_client(config['arango'])
    new_arango_client = get_singleton_arango_client(config['arango'])
    return arango_client is new_arango_client
示例#5
0
def get_arango_collections(arango_config):
    arango_db_client = get_singleton_arango_client(arango_config)
    return arango_db_client.db.collections
def load_collection_data(collection, store_tick, batch_size):
    basic_utils = get_basic_utilities()
    config, logging, mail_client = basic_utils.get_utils((CONFIG, LOGGER, SMTP_CLIENT))

    arango_client = get_singleton_arango_client(config['arango'])

    clickhouse: ClickhouseHelper = get_singleton_ch_client(config['clickhouse'])
    clickhouse_client: Client = clickhouse.client

    clickhouse_table_map = get_table_map_by_arango_collection(collection)
    clickhouse_table, clickhouse_db, clickhouse_table_schema = (
        clickhouse_table_map['clickhouse'], clickhouse_table_map['clickhouse_db'], clickhouse_table_map['schema'])

    # prepare the tables
    clickhouse_temp_table = f'{clickhouse_table}Temp'
    clickhouse.drop_table_if_exists(f'{clickhouse_db}.{clickhouse_temp_table}')
    temp_table, table_created = create_temporary_table(clickhouse_client, clickhouse_table_map['table_create'],
                                                       f'{clickhouse_db}.{clickhouse_table}',
                                                       f'{clickhouse_db}.{clickhouse_temp_table}')
    logging.info(f'temporary table created for {clickhouse_table}')

    # store current tick for the table in redis
    if store_tick:
        wal_client = get_wal_client({**config['arango'], **config['wal']})
        last_tick = wal_client.get_last_tick()
        redis_config = config['redis']
        redis_helper = get_singleton_redis_client(redis_config['host'], redis_config['port'], redis_config['db'])
        redis_helper.client.set(f'{collection}:last-tick', last_tick['tick'])
        logging.info(f'stored current wal tick: {last_tick}')

    logging.info('collect documents from arango')
    processed_documents = 0
    errors = 0
    for documents in get_all_documents(db_client=arango_client, col_name=collection, batch_size=batch_size):
        logging.info(f'documents collected fom arango: {len(documents)} docs')

        # map the documents from arango to clickhouse document
        for i in range(len(documents)):
            try:
                documents[i] = convert_to_ch_dict_using_schema(clickhouse_table_schema, documents[i])
            except (TypeError, ValueError, KeyError):
                logging.document(f'doc: {documents[i]}')
                logging.document(f'error: {traceback.format_exc()}')
                documents[i] = None
                errors += 1

        # filter invalid documents
        documents = [doc for doc in documents if doc is not None]

        if len(documents) > 0:
            total_insertion = clickhouse.bulk_dict_doc_insert(documents, temp_table, list(documents[0].keys()),
                                                              batch_size)
            logging.info(f'populated data on clickhouse: {total_insertion} docs')
            processed_documents += total_insertion
        logging.info(f'overall processed documents: {processed_documents} docs')

    logging.info('data populated on temporary table')
    clickhouse.drop_table_if_exists(f'{clickhouse_db}.{clickhouse_table}')
    logging.info(f'dropped table {clickhouse_table}')
    clickhouse.rename_table(temp_table, f'{clickhouse_db}.{clickhouse_table}')
    logging.info('table populated successfully')
    logging.info(f'Incompatible documents: {errors}')

    # prepare buffer table
    if 'buffer' in clickhouse_table_map:
        clickhouse_buffer = f'{clickhouse_table}_Buffer'
        clickhouse.drop_table_if_exists(f'{clickhouse_db}.{clickhouse_buffer}')
        logging.info(f'dropped table {clickhouse_buffer}')
        create_buffer_table(clickhouse, clickhouse_db, clickhouse_table, clickhouse_table_map)
        logging.info('buffer table created successfully')

    return True