Exemplo n.º 1
0
import sys
from config_handler import ConfigHandler

if __name__ == '__main__':
    config_handler = ConfigHandler(sys.argv[1])
    option_dict = config_handler.get_eval_option(sys.argv[2], sys.argv[3])
    print(option_dict[sys.argv[4]])
Exemplo n.º 2
0
                        (topic_list, host_conf['host'], host_conf['username'],
                         host_conf['password'], topics))
        print('%s topics created.' % len(topics_conf))


if __name__ == '__main__':
    if len(sys.argv) < 4:
        print('python extractor.py <config> <source_dir> <target_dir>\n')
        exit(1)

    source_dir = sys.argv[2]
    target_dir = sys.argv[3]
    config_handler = ConfigHandler(sys.argv[1])
    prefix = config_handler.get_config_option('info', 'prefix')

    sources = config_handler.get_eval_option('extraction', 'sources')
    for source in sources:
        tsv_files = source['tsv_files']

        for tsv_file in tsv_files:
            file_name = '%s/%s' % (source_dir, tsv_file['file_name'])
            lines = load_lines(file_name)

            ent_conf = tsv_file['entities']
            for entity_name, entity_info in ent_conf.items():
                file_name = '%s/%s.tsv' % (target_dir, entity_name)
                rows = create_rows(lines, entity_info)
                count = write_rows(rows, file_name, entity_info)
                print('%s [%s] entities extracted.' % (count, entity_name))

            rel_conf = tsv_file['relations']
Exemplo n.º 3
0
            producer.flush()
    producer.flush()
    print('. %s' % count)


if __name__ == "__main__":

    if len(sys.argv) < 3:
        print("Usage: python producer.py <config> <tsv_dir> <remote>")
        exit(1)

    handler = ConfigHandler(sys.argv[1])
    tsv_dir = sys.argv[2]
    remote = len(sys.argv) == 4

    config = handler.get_eval_option('yggdrasil', 'conf')
    prefix = handler.get_config_option('info', 'prefix')

    broker = config['broker']
    schema_registry = config['schema_registry']

    schema = handler.get_config_option('avro', 'schema')
    avro_schema = avro.loads(schema)
    avro_producer = AvroProducer(
        {
            'bootstrap.servers': broker,
            'schema.registry.url': schema_registry,
        },
        default_value_schema=avro_schema)

    # if remote: