Пример #1
0
def main(settings):
    # SYNCH WITH source ES INDEX
    source = ElasticSearch(settings.source)
    destination = get_or_create_index(settings["destination"], source)

    id_field_name = "info.started"
    esfilter = {"script": {"script": "true"}}

    pending = get_pending(source, id_field_name, esfilter)
    replicate(source, destination, pending, id_field_name, esfilter)
    Log.note("Done")
Пример #2
0
def main(settings):
    # SYNCH WITH source ES INDEX

    destination = get_or_create_index(settings.destination)

    source = File("C:/Users/klahnakoski/Downloads/records.json").read()
    lines = [CNV.JSON2object(l) for l in source.split("\n") if l.strip()]
    records = [{"id": lines[i].index._id, "value": lines[i + 1]} for i in range(0, len(lines), 2)]
    for g, r in Q.groupby(records, size=1000):
        destination.extend(r)
        Log.note("Added {{num}}", {"num": len(r)})

    Log.note("Done")
Пример #3
0
def get_pending(es, id_field_name, esfilter):
    result = es.search({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": esfilter
        }},
        "from": 0,
        "size": 0,
        "sort": [],
        "facets": {"default": {"terms": {"field": id_field_name, "size": 200000}}}
    })

    if len(result.facets.default.terms) >= 200000:
        Log.error("Can not handle more than 200K bugs changed")

    pending = Multiset(
        result.facets.default.terms,
        key_field="term",
        count_field="count"
    )
    Log.note("Source has {{num}} records for updating", {
        "num": len(pending)
    })
    return pending