Пример #1
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es14_script(
            self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [
            s for s in self.query.sort if s.value == self.edge.value
        ]
        if sort_candidates:
            self.es_order = {
                "_term": {
                    1: "asc",
                    -1: "desc"
                }[sort_candidates[0].sort]
            }
        else:
            self.es_order = None
Пример #2
0
    def append_query(self, es_query, start):
        self.start = start

        parts = self.edge.domain.partitions
        filters = []
        notty = []

        for p in parts:
            w = p.where
            filters.append(
                AndOp("and", [w] + notty).to_es14_filter(self.schema))
            notty.append(NotOp("not", w))

        missing_filter = None
        if self.edge.allowNulls:  # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER
            missing_filter = set_default(
                {"filter": AndOp("and", notty).to_es14_filter(self.schema)},
                es_query)

        return wrap({
            "aggs": {
                "_match": set_default({"filters": {
                    "filters": filters
                }}, es_query),
                "_missing": missing_filter
            }
        })
Пример #3
0
def _range_composer(edge, domain, es_query, to_float, schema):
    # USE RANGES
    _min = coalesce(domain.min, MIN(domain.partitions.min))
    _max = coalesce(domain.max, MAX(domain.partitions.max))

    if edge.allowNulls:
        missing_filter = set_default(
            {
                "filter": NotOp("not", AndOp("and", [
                    edge.value.exists(),
                    InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]),
                    InequalityOp("lt", [edge.value, Literal(None, to_float(_max))])
                ]).partial_eval()).to_esfilter(schema)
            },
            es_query
        )
    else:
        missing_filter = None

    if isinstance(edge.value, Variable):
        calc = {"field": schema.leaves(edge.value.var)[0].es_column}
    else:
        calc = {"script": edge.value.to_es_script(schema).script(schema)}

    return wrap({"aggs": {
        "_match": set_default(
            {"range": calc},
            {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}},
            es_query
        ),
        "_missing": missing_filter
    }})
Пример #4
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain

        domain_key = domain.key
        include, text_include = transpose(
            *((float(v) if isinstance(v, (int, float)) else v,
               text_type(float(v)) if isinstance(v, (int, float)) else v)
              for v in (p[domain_key] for p in domain.partitions)))
        value = self.edge.value
        exists = AndOp(
            "and",
            [value.exists(),
             InOp("in", [value, Literal("literal", include)])]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = first(self.query.frum.schema.leaves(
                value.var)).es_column  # ALREADY CHECKED THERE IS ONLY ONE
            terms = set_default(
                {
                    "terms": {
                        "field": es_field,
                        "size": limit,
                        "order": {
                            "_term": self.sorted
                        } if self.sorted else None
                    }
                }, es_query)
        else:
            terms = set_default(
                {
                    "terms": {
                        "script":
                        value.to_es14_script(self.schema).script(self.schema),
                        "size":
                        limit
                    }
                }, es_query)

        if self.edge.allowNulls:
            missing = set_default(
                {"filter": NotOp("not", exists).to_es14_filter(self.schema)},
                es_query)
        else:
            missing = None

        return wrap({
            "aggs": {
                "_match": {
                    "filter": exists.to_es14_filter(self.schema),
                    "aggs": {
                        "_filter": terms
                    }
                },
                "_missing": missing
            }
        })
Пример #5
0
    def append_query(self, es_query, start):
        self.start = start

        value = self.edge.value.partial_eval()
        script = value.to_ruby(self.schema)
        exists = NotOp("not", script.miss).partial_eval()
        if not isinstance(self.edge.value, Variable):

            output = wrap({"aggs": {
                "_match": {
                    "filter": exists.to_esfilter(self.schema),
                    "aggs": {
                        "_filter": set_default(
                            {"terms": {
                                "script": script.expr,
                                "size": self.domain.limit,
                                "order": {"_term": self.sorted} if self.sorted else None
                            }},
                            es_query
                        )
                    }
                },
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        elif self.edge.value.var in [s.value.var for s in self.query.sort]:
            sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0]
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": {"_term": "asc" if sort_dir == 1 else "desc"}
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output
Пример #6
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
        if sort_candidates:
            self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
        else:
            self.es_order = None
Пример #7
0
    def append_query(self, es_query, start):
        # TODO: USE "reverse_nested" QUERY TO PULL THESE
        self.start = start
        for i, v in enumerate(self.fields):
            exists = v.exists().partial_eval()
            nest = wrap({
                "aggs": {
                    "_match": {
                        "filter": exists.to_es14_filter(self.schema),
                        "aggs": {
                            "_filter":
                            set_default(
                                {
                                    "terms": {
                                        "field": first(
                                            self.schema.leaves(
                                                v.var)).es_column,
                                        "size": self.domain.limit
                                    }
                                }, es_query)
                        }
                    }
                }
            })
            nest.aggs._missing = set_default(
                {"filter": NotOp("not", exists).to_es14_filter(self.schema)},
                es_query)
            es_query = nest

        if self.domain.where:
            filter_ = self.domain.where.partial_eval().to_es14_filter(
                self.schema)
            es_query = {
                "aggs": {
                    "_filter": set_default({"filter": filter_}, es_query)
                }
            }

        return es_query
Пример #8
0
class DefaultDecoder(SetDecoder):
    # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)

    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(
            coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es14_script(
            self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [
            s for s in self.query.sort if s.value == self.edge.value
        ]
        if sort_candidates:
            self.es_order = {
                "_term": {
                    1: "asc",
                    -1: "desc"
                }[sort_candidates[0].sort]
            }
        else:
            self.es_order = None

    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({
                    "aggs": {
                        "_match":
                        set_default(
                            {
                                "terms": {
                                    "script": self.script.expr,
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }
                            }, es_query)
                    }
                })
            else:
                output = wrap({
                    "aggs": {
                        "_match":
                        {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                            "filter": self.exists.to_es14_filter(self.schema),
                            "aggs": {
                                "_filter":
                                set_default(
                                    {
                                        "terms": {
                                            "script": self.script.expr,
                                            "size": self.domain.limit,
                                            "order": self.es_order
                                        }
                                    }, es_query)
                            }
                        },
                        "_missing":
                        set_default(
                            {
                                "filter": self.missing.to_es14_filter(
                                    self.schema)
                            }, es_query)
                    }
                })
            return output
        else:
            output = wrap({
                "aggs": {
                    "_match":
                    set_default(
                        {
                            "terms": {
                                "field":
                                first(self.schema.leaves(
                                    self.edge.value.var)).es_column,
                                "size":
                                self.domain.limit,
                                "order":
                                self.es_order
                            }
                        }, es_query),
                    "_missing":
                    set_default(
                        {"filter": self.missing.to_es14_filter(self.schema)},
                        es_query)
                }
            })
            return output

    def count(self, row):
        part = row[self.start]
        if part['doc_count']:
            if part.get('key') != None:
                self.parts.append(self.pull(part.get('key')))
            else:
                self.edge.allowNulls = True  # OK! WE WILL ALLOW NULLS

    def done_count(self):
        self.edge.domain = self.domain = SimpleSetDomain(
            partitions=jx.sort(set(self.parts)))
        self.parts = None
        self.computed_domain = True

    def get_index(self, row):
        if self.computed_domain:
            try:
                part = row[self.start]
                return self.domain.getIndexByKey(self.pull(part.get('key')))
            except Exception as e:
                Log.error("problem", cause=e)
        else:
            try:
                part = row[self.start]
                key = self.pull(part.get('key'))
                i = self.key2index.get(key)
                if i is None:
                    i = len(self.parts)
                    part = {"key": key, "dataIndex": i}
                    self.parts.append(part)
                    self.key2index[key] = i
                return i
            except Exception as e:
                Log.error("problem", cause=e)

    @property
    def num_columns(self):
        return 1
Пример #9
0
class DefaultDecoder(SetDecoder):
    # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES)

    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        self.domain = edge.domain
        self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
        self.script = self.edge.value.partial_eval().to_es_script(self.schema)
        self.pull = pull_functions[self.script.data_type]
        self.missing = self.script.miss.partial_eval()
        self.exists = NotOp("not", self.missing).partial_eval()

        # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM
        sort_candidates = [s for s in self.query.sort if s.value == self.edge.value]
        if sort_candidates:
            self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]}
        else:
            self.es_order = None

    def append_query(self, es_query, start):
        self.start = start

        if not isinstance(self.edge.value, Variable):
            if self.exists is TRUE:
                # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH)
                output = wrap({"aggs": {
                    "_match": set_default(
                        {"terms": {
                            "script": self.script.expr,
                            "size": self.domain.limit,
                            "order": self.es_order
                        }},
                        es_query
                    )
                }})
            else:
                output = wrap({"aggs": {
                    "_match": {  # _match AND _filter REVERSED SO _match LINES UP WITH _missing
                        "filter": self.exists.to_esfilter(self.schema),
                        "aggs": {
                            "_filter": set_default(
                                {"terms": {
                                    "script": self.script.expr,
                                    "size": self.domain.limit,
                                    "order": self.es_order
                                }},
                                es_query
                            )
                        }
                    },
                    "_missing": set_default(
                        {"filter": self.missing.to_esfilter(self.schema)},
                        es_query
                    )
                }})
            return output
        else:
            output = wrap({"aggs": {
                "_match": set_default(
                    {"terms": {
                        "field": self.schema.leaves(self.edge.value.var)[0].es_column,
                        "size": self.domain.limit,
                        "order": self.es_order
                    }},
                    es_query
                ),
                "_missing": set_default(
                    {"filter": self.missing.to_esfilter(self.schema)},
                    es_query
                )
            }})
            return output

    def count(self, row):
        part = row[self.start]
        if part['doc_count']:
            if part.get('key') != None:
                self.parts.append(self.pull(part.get('key')))
            else:
                self.edge.allowNulls = True  # OK! WE WILL ALLOW NULLS

    def done_count(self):
        self.edge.domain = self.domain = SimpleSetDomain(
            partitions=jx.sort(set(self.parts))
        )
        self.parts = None
        self.computed_domain = True

    def get_index(self, row):
        if self.computed_domain:
            try:
                part = row[self.start]
                return self.domain.getIndexByKey(self.pull(part.get('key')))
            except Exception as e:
                Log.error("problem", cause=e)
        else:
            try:
                part = row[self.start]
                key = self.pull(part.get('key'))
                i = self.key2index.get(key)
                if i is None:
                    i = len(self.parts)
                    part = {"key": key, "dataIndex": i}
                    self.parts.append(part)
                    self.key2index[key] = i
                return i
            except Exception as e:
                Log.error("problem", cause=e)

    @property
    def num_columns(self):
        return 1