Пример #1
0
def clone_index(createidx=False, test=True):
    if test:
        return
    from utils.es import ESIndexer
    from utils.common import iter_n

    new_idx = 'myvariant_current_3'
    step = 10000
    if createidx:
        from mapping import get_mapping
        m = get_mapping()
        body = {'settings': {'number_of_shards': 10}}  # ###
        es.indices.create(new_idx, body=body)
        es.indices.put_mapping(index=new_idx, doc_type='variant', body=m)
    # helpers.reindex(es, source_index='myvariant_all',
    #                 target_index= new_idx, chunk_size=10000)
    esi = ESIndexer()
    doc_iter = esi.doc_feeder(index='myvariant_all_1',
                              doc_type='variant',
                              step=step)

    for doc_batch in iter_n(doc_iter, step):
        do_index(doc_batch,
                 index_name=new_idx,
                 doc_type='variant',
                 step=step,
                 verbose=False,
                 update=True)
Пример #2
0
def clone_index(createidx=False, test=True):
    if test:
        return
    from utils.es import ESIndexer
    from utils.common import iter_n

    new_idx = 'myvariant_current_3'
    step = 10000
    if createidx:
        from mapping import get_mapping
        m = get_mapping()
        body = {'settings': {'number_of_shards': 10}}    # ###
        es.indices.create(new_idx, body=body)
        es.indices.put_mapping(index=new_idx, doc_type='variant', body=m)
    # helpers.reindex(es, source_index='myvariant_all',
    #                 target_index= new_idx, chunk_size=10000)
    esi = ESIndexer()
    doc_iter = esi.doc_feeder(index='myvariant_all_1', doc_type='variant', step=step)

    for doc_batch in iter_n(doc_iter, step):
        do_index(doc_batch, index_name=new_idx, doc_type='variant', step=step, verbose=False, update=True)
Пример #3
0
import os
from collections import defaultdict
from mapping import get_mapping
from pymongo import MongoClient
import ast
import math
import numpy as np
import json
import csv

DATADIR = './data/'
DATAFILE = 'earn_nt_net.tsv'
DATAFILE_OUT = 'tax_ratio.csv'
MAPPING = get_mapping()


def parse_file(datafile):
    data = []
    with open(datafile, 'rU') as f:
        header = f.readline().split(",")
        header[3:] = [col for col in header[3].split("\t")]
        header[3] = 'country' # use a friendly name
        for line in f:
            fields = line.split(",")
            fields[3:] = [col for col in fields[3].split("\t")]
            entry = {} 
            for i, value in enumerate(fields):
                entry[header[i].strip()] = value.strip()
            data.append(entry)
    return data
Пример #4
0
def create_index(index_name, mapping=None):
    body = {'settings': {'number_of_shards': 20}}
    mapping = mapping or get_mapping()
    mapping = {"mappings": mapping}
    body.update(mapping)
    es.indices.create(index=index_name, body=body)
Пример #5
0
def create_index(index_name, mapping=None):
    body = {'settings': {'number_of_shards': 20}}
    mapping = mapping or get_mapping()
    mapping = {"mappings": mapping}
    body.update(mapping)
    es.indices.create(index=index_name, body=body)