def _range_composer(self, edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) output = Aggs() if edge.allowNulls: output.add( FilterAggs( "_missing", NotOp( AndOp([ edge.value.exists(), GteOp([edge.value, Literal(to_float(_min))]), LtOp([edge.value, Literal(to_float(_max))]) ]).partial_eval()), self).add(es_query)) if is_op(edge.value, Variable): calc = {"field": first(schema.leaves(edge.value.var)).es_column} else: calc = {"script": text_type(Painless[edge.value].to_es_script(schema))} calc['ranges'] = [{ "from": to_float(p.min), "to": to_float(p.max) } for p in domain.partitions] return output.add(RangeAggs("_match", calc, self).add(es_query))
def _range_composer(edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) if edge.allowNulls: missing_filter = set_default( { "filter": NotOp("not", AndOp("and", [ edge.value.exists(), InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) ]).partial_eval()).to_esfilter(schema) }, es_query ) else: missing_filter = None if isinstance(edge.value, Variable): calc = {"field": schema.leaves(edge.value.var)[0].es_column} else: calc = {"script": edge.value.to_painless(schema).script(schema)} return wrap({"aggs": { "_match": set_default( {"range": calc}, {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, es_query ), "_missing": missing_filter }})
def append_query(self, query_path, es_query): edge = self.edge range = edge.range domain = edge.domain aggs = Aggs() for i, p in enumerate(domain.partitions): filter_ = AndOp([ LteOp([range.min, Literal(self.to_float(p.min))]), GtOp([range.max, Literal(self.to_float(p.min))]) ]) aggs.add(FilterAggs("_match" + text(i), filter_, self).add(es_query)) return aggs
def append_query(self, query_path, es_query): edge = self.edge range = edge.range domain = edge.domain aggs = Aggs() for i, p in enumerate(domain.partitions): filter_ = AndOp("and", [ InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]), InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))]) ]) aggs.add(FilterAggs("_match" + text_type(i), filter_, self).add(es_query)) return aggs
def append_query(self, query_path, es_query): domain = self.domain domain_key = domain.key value = self.edge.value cnv = pull_functions[value.type] include = tuple(cnv(p[domain_key]) for p in domain.partitions) exists = AndOp("and", [ InOp("in", [value, Literal("literal", include)]) ]).partial_eval() limit = coalesce(self.limit, len(domain.partitions)) if isinstance(value, Variable): es_field = first(self.query.frum.schema.leaves(value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE match = TermsAggs( "_match", { "field": es_field, "size": limit, "order": {"_term": self.sorted} if self.sorted else None }, self ) else: match = TermsAggs( "_match", { "script": { "lang": "painless", "inline": value.to_es_script(self.schema).script(self.schema) }, "size": limit }, self ) output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query))) if self.edge.allowNulls: # FIND NULLS AT EACH NESTED LEVEL for p in self.schema.query_path: if p == query_path: # MISSING AT THE QUERY DEPTH output.add( NestedAggs(p).add(FilterAggs("_missing0", NotOp(None, exists), self).add(es_query)) ) else: # PARENT HAS NO CHILDREN, SO MISSING column = first(self.schema.values(query_path, (OBJECT, EXISTS))) output.add( NestedAggs(column.nested_path[0]).add( FilterAggs( "_missing1", NotOp(None, ExistsOp(None, Variable(column.es_column.replace(NESTED_TYPE, EXISTS_TYPE)))), self ).add(es_query) ) ) return output
def append_query(self, es_query, start): self.start = start edge = self.edge range = edge.range domain = edge.domain aggs = {} for i, p in enumerate(domain.partitions): filter_ = AndOp("and", [ InequalityOp("lte", [range.min, Literal("literal", self.to_float(p.min))]), InequalityOp("gt", [range.max, Literal("literal", self.to_float(p.min))]) ]) aggs["_join_" + text_type(i)] = set_default( {"filter": filter_.to_esfilter(self.schema)}, es_query ) return wrap({"aggs": aggs})
def append_query(self, es_query, start): self.start = start domain = self.domain domain_key = domain.key include, text_include = zip(*( ( float(v) if isinstance(v, (int, float)) else v, text_type(float(v)) if isinstance(v, (int, float)) else v ) for v in (p[domain_key] for p in domain.partitions) )) value = self.edge.value exists = AndOp("and", [ value.exists(), InOp("in", [value, Literal("literal", include)]) ]).partial_eval() limit = coalesce(self.limit, len(domain.partitions)) if isinstance(value, Variable): es_field = self.query.frum.schema.leaves(value.var)[0].es_column # ALREADY CHECKED THERE IS ONLY ONE terms = set_default({"terms": { "field": es_field, "size": limit, "order": {"_term": self.sorted} if self.sorted else None }}, es_query) else: terms = set_default({"terms": { "script": { "lang": "painless", "inline": value.to_painless(self.schema).script(self.schema) }, "size": limit }}, es_query) if self.edge.allowNulls: missing = set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) else: missing = None return wrap({"aggs": { "_match": { "filter": exists.to_esfilter(self.schema), "aggs": { "_filter": terms } }, "_missing": missing }})
def _range_composer(self, edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) output = Aggs() if edge.allowNulls: output.add(FilterAggs( "_missing", NotOp("not", AndOp("and", [ edge.value.exists(), InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) ]).partial_eval()), self ).add(es_query)) if isinstance(edge.value, Variable): calc = {"field": first(schema.leaves(edge.value.var)).es_column} else: calc = {"script": edge.value.to_es_script(schema).script(schema)} calc['ranges'] = [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions] return output.add(RangeAggs("_match", calc, self).add(es_query))
def append_query(self, query_path, es_query): domain = self.domain domain_key = domain.key value = self.edge.value cnv = pull_functions[value.type] include = tuple(cnv(p[domain_key]) for p in domain.partitions) schema = self.schema exists = InOp([value, Literal(include)]).partial_eval() limit = coalesce(self.limit, len(domain.partitions)) if is_op(value, Variable): es_field = first(schema.leaves(value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE match = TermsAggs( "_match", { "field": es_field, "size": limit, "order": {"_term": self.sorted} if self.sorted else None }, self ) else: match = TermsAggs( "_match", { "script": text(Painless[value].to_es_script(schema)), "size": limit }, self ) output = Aggs().add(FilterAggs("_filter", exists, None).add(match.add(es_query))) if self.edge.allowNulls: # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS? # MISSING AT THE QUERY DEPTH # columns = schema[value.var] concat_inner = split_expression(NotOp(exists), self.query) for i, term in enumerate(concat_inner.terms): acc = es_query for nest in term.nests: if nest.where is not TRUE: acc = NestedAggs(nest.path.var).add(FilterAggs("_missing" + text(i), nest.where, self).add(acc)) output.add(acc) return output
def append_query(self, query_path, es_query): domain = self.domain domain_key = domain.key value = Painless[self.edge.value] cnv = pull_functions[value.type] include = tuple(cnv(p[domain_key]) for p in domain.partitions) schema = self.schema exists = Painless[AndOp([InOp([value, Literal(include)])])].partial_eval() limit = coalesce(self.limit, len(domain.partitions)) if is_op(value, Variable): es_field = first(schema.leaves( value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE match = TermsAggs( "_match", { "field": es_field, "size": limit, "order": { "_term": self.sorted } if self.sorted else None }, self) else: match = TermsAggs("_match", { "script": text(value.to_es_script(schema)), "size": limit }, self) output = Aggs().add( FilterAggs("_filter", exists, None).add(match.add(es_query))) if self.edge.allowNulls: # IF ALL NESTED COLUMNS ARE NULL, DOES THE FILTER PASS? # MISSING AT THE QUERY DEPTH op, split = split_expression_by_path(NotOp(exists), schema) for i, p in enumerate(reversed(sorted(split.keys()))): e = split.get(p) if e: not_match = NestedAggs(p).add( FilterAggs("_missing" + text(i), e, self).add(es_query)) output.add(not_match) return output