from topic_tracking.util.mongo import MongoConnectionManager


if __name__ == '__main__':

    # MongoDB
    host = 'localhost'
    port = 27017
    mcm = MongoConnectionManager(host, port, MongoCodec())
    database = 'processed'
    resource_collection = mcm.get_collection(database, 'resources', Resource)

    # ElasticSearch
    es = ES('localhost:9200', timeout=60)
    resource_index = 'topic_tracking_resources'
    story_index = 'topic_tracking_stories'

    # utilities
    index_helper = IndexHelper(es, resource_index, story_index)

    # get a resource to mongo
    resource = resource_collection.find_one_model()

    # analyze the resource
    term_string = index_helper._build_payload_string(resource.terms)
    pprint(term_string)
    params = {}
    params['text'] = term_string
    response = es._send_request('GET', resource_index + '/_analyze', None, params)
    pprint(response)
Пример #2
0
    config_file = sys.argv[1]
    config = yaml.load(file(config_file, 'r'))

    # logging
    logging.config.dictConfig(config['logging'])
    logger = logging.getLogger()

    # MongoDB
    mcm = mongo_from_config(config['mongo'])
    database = config['mongo']['databases']['processed']
    resource_collection = mcm.get_collection(database, 'resources', Resource)
    story_collection = mcm.get_collection(database, 'stories', Story)

    # elasticsearch
    es = elasticsearch_from_config(config['elasticsearch'])
    main_index = config['elasticsearch']['indexes']['main']
    # open_index = config['elasticsearch']['indexes']['open']

    # helpers
    index_helper = IndexHelper(es)

    for resource in resource_collection.find_models():
        index_helper.index_resource(resource, main_index)
        logger.debug('Indexed resource %s.' % resource)

    for story in story_collection.find_models():
        index_helper.index_story(story, main_index)
        logger.debug('Indexed story %s.' % story)

    logger.info('Re-indexing complete.')