Exemplo n.º 1
0
def es_deepop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es_query_template(query_path)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for f, w in zip_longest(es_filters, wheres):
        script = ES52[AndOp(w)].partial_eval().to_esfilter(schema)
        set_default(f, script)

    if not wheres[1]:
        # INCLUDE DOCS WITH NO NESTED DOCS
        more_filter = {
            "bool": {
                "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)],
                "must_not": {
                    "nested": {
                        "path": query_path,
                        "query": MATCH_ALL
                    }
                }
            }
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)

    map_to_es_columns = schema.map_to_es()
    query_for_es = query.map(map_to_es_columns)
    es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)

    es_query.stored_fields = []

    is_list = is_list_(query.select)
    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            leaves = schema.leaves(select.value.term.var)
            col_names = set()
            for c in leaves:
                if c.nested_path[0] == ".":
                    if c.jx_type == NESTED:
                        continue
                    es_query.stored_fields += [c.es_column]
                c_name = untype_path(relative_field(c.name, query_path))
                col_names.add(c_name)
                new_select.append({
                    "name": concat_field(select.name, c_name),
                    "nested_path": c.nested_path[0],
                    "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."},
                    "pull": get_pull_function(c)
                })
                put_index += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.put.name = n.name = n.name.lstrip(".")
                    col_names.add(n.name)
        elif is_op(select.value, Variable):
            net_columns = schema.leaves(select.value.var)
            if not net_columns:
                new_select.append({
                    "name": select.name,
                    "nested_path": ".",
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": NULL
                })
            else:
                for n in net_columns:
                    pull = get_pull_function(n)
                    if n.nested_path[0] == ".":
                        if n.jx_type == NESTED:
                            continue
                        es_query.stored_fields += [n.es_column]

                    # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
                    for np in n.nested_path:
                        c_name = untype_path(relative_field(n.name, np))
                        if startswith_field(c_name, select.value.var):
                            # PREFER THE MOST-RELATIVE NAME
                            child = relative_field(c_name, select.value.var)
                            break
                    else:
                        continue

                    new_select.append({
                        "name": select.name,
                        "pull": pull,
                        "nested_path": n.nested_path[0],
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": child
                        }
                    })
            put_index += 1
        else:
            expr = select.value
            for v in expr.vars():
                for c in schema[v.var]:
                    if c.nested_path[0] == ".":
                        es_query.stored_fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull_name = EXPRESSION_PREFIX + select.name
            map_to_local = MapToLocal(schema)
            pull = jx_expression_to_function(pull_name)
            post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local))

            new_select.append({
                "name": select.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": select.name, "index": put_index, "child": "."}
            })
            put_index += 1

    es_query.stored_fields = sorted(es_query.stored_fields)

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es_post(
            es,
            Data(
                query=more_filter,
                stored_fields=es_query.stored_fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    # </COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Exemplo n.º 2
0
def es_setop(es, query):
    schema = query.frum.schema

    es_query, filters = es_query_template(schema.query_path[0])
    nested_filter = None
    set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.stored_fields = FlatList()

    selects = wrap([s.copy() for s in listwrap(query.select)])
    new_select = FlatList()
    schema = query.frum.schema
    # columns = schema.columns
    # nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")

    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(select.value, LeavesOp) and isinstance(
                select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(
                    select.name,
                    relative_field(untype_path(c.names["."]), term.var))
                if c.jx_type == NESTED:
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                elif c.nested_path[0] != ".":
                    pass  # THE NESTED PARENT WILL CAPTURE THIS
                else:
                    es_query.stored_fields += [c.es_column]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {
                            "name": literal_field(full_name),
                            "index": put_index,
                            "child": "."
                        }
                    })
                    put_index += 1
        elif isinstance(select.value, Variable):
            s_column = select.value.var
            # LEAVES OF OBJECT
            leaves = schema.leaves(s_column)
            nested_selects = {}
            if leaves:
                if s_column == '.':
                    # PULL ALL SOURCE
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": select.name,
                        "value": select.value,
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": "."
                        },
                        "pull": get_pull_source(".")
                    })
                elif any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    es_query.stored_fields = ["_source"]
                    for c in leaves:
                        if len(
                                c.nested_path
                        ) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
                            jx_name = untype_path(c.names["."])
                            new_select.append({
                                "name":
                                select.name,
                                "value":
                                Variable(c.es_column),
                                "put": {
                                    "name": select.name,
                                    "index": put_index,
                                    "child": relative_field(jx_name, s_column)
                                },
                                "pull":
                                get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        if len(c.nested_path) == 1:
                            jx_name = untype_path(c.names["."])
                            if c.jx_type == NESTED:
                                es_query.stored_fields = ["_source"]
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child":
                                        relative_field(jx_name, s_column)
                                    },
                                    "pull":
                                    get_pull_source(c.es_column)
                                })

                            else:
                                es_query.stored_fields += [c.es_column]
                                new_select.append({
                                    "name":
                                    select.name,
                                    "value":
                                    Variable(c.es_column),
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child":
                                        relative_field(jx_name, s_column)
                                    }
                                })
                        else:
                            if not nested_filter:
                                where = filters[0].copy()
                                nested_filter = [where]
                                for k in filters[0].keys():
                                    filters[0][k] = None
                                set_default(
                                    filters[0],
                                    es_and([where, es_or(nested_filter)]))

                            nested_path = c.nested_path[0]
                            if nested_path not in nested_selects:
                                where = nested_selects[nested_path] = Data()
                                nested_filter += [where]
                                where.nested.path = nested_path
                                where.nested.query.match_all = {}
                                where.nested.inner_hits._source = False
                                where.nested.inner_hits.stored_fields += [
                                    c.es_column
                                ]

                                child = relative_field(
                                    untype_path(c.names[schema.query_path[0]]),
                                    s_column)
                                pull = accumulate_nested_doc(
                                    nested_path,
                                    Variable(
                                        relative_field(
                                            s_column,
                                            unnest_path(nested_path))))
                                new_select.append({
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child
                                    },
                                    "pull": pull
                                })
                            else:
                                nested_selects[
                                    nested_path].nested.inner_hits.stored_fields += [
                                        c.es_column
                                    ]
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {
                        "name": select.name,
                        "index": put_index,
                        "child": "."
                    }
                })
            put_index += 1
        else:
            painless = select.value.partial_eval().to_es_script(schema)
            es_query.script_fields[literal_field(select.name)] = es_script(
                painless.script(schema))
            new_select.append({
                "name":
                select.name,
                "pull":
                jx_expression_to_function("fields." +
                                          literal_field(select.name)),
                "put": {
                    "name": select.name,
                    "index": put_index,
                    "child": "."
                }
            })
            put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif isinstance(n.value, Variable):
            if es_query.stored_fields[0] == "_source":
                es_query.stored_fields = ["_source"]
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(
                    concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter"):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Exemplo n.º 3
0
def es_deepop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es_query_template(query_path)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for i, f in enumerate(es_filters):
        script = AndOp("and", wheres[i]).partial_eval().to_esfilter(schema)
        set_default(f, script)

    if not wheres[1]:
        more_filter = {
            "bool": {
                "filter": [AndOp("and", wheres[0]).partial_eval().to_esfilter(schema)],
                "must_not": {
                    "nested": {
                        "path": query_path,
                        "query": {
                            "match_all": {}
                        }
                    }
                }
            }
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)

    # es_query.sort = jx_sort_to_es_sort(query.sort)
    map_to_es_columns = schema.map_to_es()
    # {c.names["."]: c.es_column for c in schema.leaves(".")}
    query_for_es = query.map(map_to_es_columns)
    es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)

    es_query.stored_fields = []

    is_list = isinstance(query.select, list)
    new_select = FlatList()

    i = 0
    for s in listwrap(query.select):
        if isinstance(s.value, LeavesOp) and isinstance(s.value.term, Variable):
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            leaves = schema.leaves(s.value.term.var)
            col_names = set()
            for c in leaves:
                if c.nested_path[0] == ".":
                    if c.jx_type == NESTED:
                        continue
                    es_query.stored_fields += [c.es_column]
                c_name = untype_path(c.names[query_path])
                col_names.add(c_name)
                new_select.append({
                    "name": concat_field(s.name, c_name),
                    "nested_path": c.nested_path[0],
                    "put": {"name": concat_field(s.name, literal_field(c_name)), "index": i, "child": "."},
                    "pull": get_pull_function(c)
                })
                i += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.put.name = n.name = n.name.lstrip(".")
                    col_names.add(n.name)
        elif isinstance(s.value, Variable):
            net_columns = schema.leaves(s.value.var)
            if not net_columns:
                new_select.append({
                    "name": s.name,
                    "nested_path": ".",
                    "put": {"name": s.name, "index": i, "child": "."},
                    "pull": NULL
                })
            else:
                for n in net_columns:
                    pull = get_pull_function(n)
                    if n.nested_path[0] == ".":
                        if n.jx_type == NESTED:
                            continue
                        es_query.stored_fields += [n.es_column]

                    # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
                    for np in n.nested_path:
                        c_name = untype_path(n.names[np])
                        if startswith_field(c_name, s.value.var):
                            child = relative_field(c_name, s.value.var)
                            break
                    else:
                        child = relative_field(untype_path(n.names[n.nested_path[0]]), s.value.var)

                    new_select.append({
                        "name": s.name,
                        "pull": pull,
                        "nested_path": n.nested_path[0],
                        "put": {
                            "name": s.name,
                            "index": i,
                            "child": child
                        }
                    })
            i += 1
        else:
            expr = s.value
            for v in expr.vars():
                for c in schema[v.var]:
                    if c.nested_path[0] == ".":
                        es_query.stored_fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull_name = EXPRESSION_PREFIX + s.name
            map_to_local = MapToLocal(schema)
            pull = jx_expression_to_function(pull_name)
            post_expressions[pull_name] = compile_expression(expr.map(map_to_local).to_python())

            new_select.append({
                "name": s.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": s.name, "index": i, "child": "."}
            })
            i += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es_post(
            es,
            Data(
                query=more_filter,
                stored_fields=es_query.stored_fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    #</COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
Exemplo n.º 4
0
def es_setop(es, query):
    schema = query.frum.schema

    es_query, filters = es_query_template(schema.query_path[0])
    nested_filter = None
    set_default(filters[0], query.where.partial_eval().to_esfilter(schema))
    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)
    es_query.stored_fields = FlatList()

    selects = wrap([s.copy() for s in listwrap(query.select)])
    new_select = FlatList()
    schema = query.frum.schema
    # columns = schema.columns
    # nested_columns = set(c.names["."] for c in columns if c.nested_path[0] != ".")

    es_query.sort = jx_sort_to_es_sort(query.sort, schema)

    put_index = 0
    for select in selects:
        # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
        if isinstance(select.value, LeavesOp) and isinstance(select.value.term, Variable):
            term = select.value.term
            leaves = schema.leaves(term.var)
            for c in leaves:
                full_name = concat_field(select.name, relative_field(untype_path(c.names["."]), term.var))
                if c.jx_type == NESTED:
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."},
                        "pull": get_pull_source(c.es_column)
                    })
                    put_index += 1
                elif c.nested_path[0] != ".":
                    pass  # THE NESTED PARENT WILL CAPTURE THIS
                else:
                    es_query.stored_fields += [c.es_column]
                    new_select.append({
                        "name": full_name,
                        "value": Variable(c.es_column),
                        "put": {"name": literal_field(full_name), "index": put_index, "child": "."}
                    })
                    put_index += 1
        elif isinstance(select.value, Variable):
            s_column = select.value.var
            # LEAVES OF OBJECT
            leaves = schema.leaves(s_column)
            nested_selects = {}
            if leaves:
                if s_column == '.':
                    # PULL ALL SOURCE
                    es_query.stored_fields = ["_source"]
                    new_select.append({
                        "name": select.name,
                        "value": select.value,
                        "put": {"name": select.name, "index": put_index, "child": "."},
                        "pull": get_pull_source(".")
                    })
                elif any(c.jx_type == NESTED for c in leaves):
                    # PULL WHOLE NESTED ARRAYS
                    es_query.stored_fields = ["_source"]
                    for c in leaves:
                        if len(c.nested_path) == 1:  # NESTED PROPERTIES ARE IGNORED, CAPTURED BY THESE FIRT LEVEL PROPERTIES
                            jx_name = untype_path(c.names["."])
                            new_select.append({
                                "name": select.name,
                                "value": Variable(c.es_column),
                                "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
                                "pull": get_pull_source(c.es_column)
                            })
                else:
                    # PULL ONLY WHAT'S NEEDED
                    for c in leaves:
                        if len(c.nested_path) == 1:
                            jx_name = untype_path(c.names["."])
                            if c.jx_type == NESTED:
                                es_query.stored_fields = ["_source"]
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)},
                                    "pull": get_pull_source(c.es_column)
                                })

                            else:
                                es_query.stored_fields += [c.es_column]
                                new_select.append({
                                    "name": select.name,
                                    "value": Variable(c.es_column),
                                    "put": {"name": select.name, "index": put_index, "child": relative_field(jx_name, s_column)}
                                })
                        else:
                            if not nested_filter:
                                where = filters[0].copy()
                                nested_filter = [where]
                                for k in filters[0].keys():
                                    filters[0][k] = None
                                set_default(
                                    filters[0],
                                    es_and([where, es_or(nested_filter)])
                                )

                            nested_path = c.nested_path[0]
                            if nested_path not in nested_selects:
                                where = nested_selects[nested_path] = Data()
                                nested_filter += [where]
                                where.nested.path = nested_path
                                where.nested.query.match_all = {}
                                where.nested.inner_hits._source = False
                                where.nested.inner_hits.stored_fields += [c.es_column]

                                child = relative_field(untype_path(c.names[schema.query_path[0]]), s_column)
                                pull = accumulate_nested_doc(nested_path, Variable(relative_field(s_column, unnest_path(nested_path))))
                                new_select.append({
                                    "name": select.name,
                                    "value": select.value,
                                    "put": {
                                        "name": select.name,
                                        "index": put_index,
                                        "child": child
                                    },
                                    "pull": pull
                                })
                            else:
                                nested_selects[nested_path].nested.inner_hits.stored_fields += [c.es_column]
            else:
                new_select.append({
                    "name": select.name,
                    "value": Variable("$dummy"),
                    "put": {"name": select.name, "index": put_index, "child": "."}
                })
            put_index += 1
        else:
            painless = select.value.partial_eval().to_es_script(schema)
            es_query.script_fields[literal_field(select.name)] = es_script(painless.script(schema))
            new_select.append({
                "name": select.name,
                "pull": jx_expression_to_function("fields." + literal_field(select.name)),
                "put": {"name": select.name, "index": put_index, "child": "."}
            })
            put_index += 1

    for n in new_select:
        if n.pull:
            continue
        elif isinstance(n.value, Variable):
            if es_query.stored_fields[0] == "_source":
                es_query.stored_fields = ["_source"]
                n.pull = get_pull_source(n.value.var)
            elif n.value == "_id":
                n.pull = jx_expression_to_function("_id")
            else:
                n.pull = jx_expression_to_function(concat_field("fields", literal_field(n.value.var)))
        else:
            Log.error("Do not know what to do")

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    T = data.hits.hits

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        with Timer("formatter"):
            output = formatter(T, new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)