def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min( coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es14_script( self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [ s for s in self.query.sort if s.value == self.edge.value ] if sort_candidates: self.es_order = { "_term": { 1: "asc", -1: "desc" }[sort_candidates[0].sort] } else: self.es_order = None
def append_query(self, es_query, start): self.start = start parts = self.edge.domain.partitions filters = [] notty = [] for p in parts: w = p.where filters.append( AndOp("and", [w] + notty).to_es14_filter(self.schema)) notty.append(NotOp("not", w)) missing_filter = None if self.edge.allowNulls: # TODO: Use Expression.missing().esfilter() TO GET OPTIMIZED FILTER missing_filter = set_default( {"filter": AndOp("and", notty).to_es14_filter(self.schema)}, es_query) return wrap({ "aggs": { "_match": set_default({"filters": { "filters": filters }}, es_query), "_missing": missing_filter } })
def _range_composer(edge, domain, es_query, to_float, schema): # USE RANGES _min = coalesce(domain.min, MIN(domain.partitions.min)) _max = coalesce(domain.max, MAX(domain.partitions.max)) if edge.allowNulls: missing_filter = set_default( { "filter": NotOp("not", AndOp("and", [ edge.value.exists(), InequalityOp("gte", [edge.value, Literal(None, to_float(_min))]), InequalityOp("lt", [edge.value, Literal(None, to_float(_max))]) ]).partial_eval()).to_esfilter(schema) }, es_query ) else: missing_filter = None if isinstance(edge.value, Variable): calc = {"field": schema.leaves(edge.value.var)[0].es_column} else: calc = {"script": edge.value.to_es_script(schema).script(schema)} return wrap({"aggs": { "_match": set_default( {"range": calc}, {"range": {"ranges": [{"from": to_float(p.min), "to": to_float(p.max)} for p in domain.partitions]}}, es_query ), "_missing": missing_filter }})
def append_query(self, es_query, start): self.start = start domain = self.domain domain_key = domain.key include, text_include = transpose( *((float(v) if isinstance(v, (int, float)) else v, text_type(float(v)) if isinstance(v, (int, float)) else v) for v in (p[domain_key] for p in domain.partitions))) value = self.edge.value exists = AndOp( "and", [value.exists(), InOp("in", [value, Literal("literal", include)])]).partial_eval() limit = coalesce(self.limit, len(domain.partitions)) if isinstance(value, Variable): es_field = first(self.query.frum.schema.leaves( value.var)).es_column # ALREADY CHECKED THERE IS ONLY ONE terms = set_default( { "terms": { "field": es_field, "size": limit, "order": { "_term": self.sorted } if self.sorted else None } }, es_query) else: terms = set_default( { "terms": { "script": value.to_es14_script(self.schema).script(self.schema), "size": limit } }, es_query) if self.edge.allowNulls: missing = set_default( {"filter": NotOp("not", exists).to_es14_filter(self.schema)}, es_query) else: missing = None return wrap({ "aggs": { "_match": { "filter": exists.to_es14_filter(self.schema), "aggs": { "_filter": terms } }, "_missing": missing } })
def append_query(self, es_query, start): self.start = start value = self.edge.value.partial_eval() script = value.to_ruby(self.schema) exists = NotOp("not", script.miss).partial_eval() if not isinstance(self.edge.value, Variable): output = wrap({"aggs": { "_match": { "filter": exists.to_esfilter(self.schema), "aggs": { "_filter": set_default( {"terms": { "script": script.expr, "size": self.domain.limit, "order": {"_term": self.sorted} if self.sorted else None }}, es_query ) } }, "_missing": set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) }}) return output elif self.edge.value.var in [s.value.var for s in self.query.sort]: sort_dir = [s.sort for s in self.query.sort if s.value.var == self.edge.value.var][0] output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.schema.leaves(self.edge.value.var)[0].es_column, "size": self.domain.limit, "order": {"_term": "asc" if sort_dir == 1 else "desc"} }}, es_query ), "_missing": set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) }}) return output else: output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.schema.leaves(self.edge.value.var)[0].es_column, "size": self.domain.limit }}, es_query ), "_missing": set_default( {"filter": NotOp("not", exists).to_esfilter(self.schema)}, es_query ) }}) return output
def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es_script(self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [s for s in self.query.sort if s.value == self.edge.value] if sort_candidates: self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]} else: self.es_order = None
def append_query(self, es_query, start): # TODO: USE "reverse_nested" QUERY TO PULL THESE self.start = start for i, v in enumerate(self.fields): exists = v.exists().partial_eval() nest = wrap({ "aggs": { "_match": { "filter": exists.to_es14_filter(self.schema), "aggs": { "_filter": set_default( { "terms": { "field": first( self.schema.leaves( v.var)).es_column, "size": self.domain.limit } }, es_query) } } } }) nest.aggs._missing = set_default( {"filter": NotOp("not", exists).to_es14_filter(self.schema)}, es_query) es_query = nest if self.domain.where: filter_ = self.domain.where.partial_eval().to_es14_filter( self.schema) es_query = { "aggs": { "_filter": set_default({"filter": filter_}, es_query) } } return es_query
class DefaultDecoder(SetDecoder): # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES) def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min( coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es14_script( self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [ s for s in self.query.sort if s.value == self.edge.value ] if sort_candidates: self.es_order = { "_term": { 1: "asc", -1: "desc" }[sort_candidates[0].sort] } else: self.es_order = None def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): if self.exists is TRUE: # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH) output = wrap({ "aggs": { "_match": set_default( { "terms": { "script": self.script.expr, "size": self.domain.limit, "order": self.es_order } }, es_query) } }) else: output = wrap({ "aggs": { "_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing "filter": self.exists.to_es14_filter(self.schema), "aggs": { "_filter": set_default( { "terms": { "script": self.script.expr, "size": self.domain.limit, "order": self.es_order } }, es_query) } }, "_missing": set_default( { "filter": self.missing.to_es14_filter( self.schema) }, es_query) } }) return output else: output = wrap({ "aggs": { "_match": set_default( { "terms": { "field": first(self.schema.leaves( self.edge.value.var)).es_column, "size": self.domain.limit, "order": self.es_order } }, es_query), "_missing": set_default( {"filter": self.missing.to_es14_filter(self.schema)}, es_query) } }) return output def count(self, row): part = row[self.start] if part['doc_count']: if part.get('key') != None: self.parts.append(self.pull(part.get('key'))) else: self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS def done_count(self): self.edge.domain = self.domain = SimpleSetDomain( partitions=jx.sort(set(self.parts))) self.parts = None self.computed_domain = True def get_index(self, row): if self.computed_domain: try: part = row[self.start] return self.domain.getIndexByKey(self.pull(part.get('key'))) except Exception as e: Log.error("problem", cause=e) else: try: part = row[self.start] key = self.pull(part.get('key')) i = self.key2index.get(key) if i is None: i = len(self.parts) part = {"key": key, "dataIndex": i} self.parts.append(part) self.key2index[key] = i return i except Exception as e: Log.error("problem", cause=e) @property def num_columns(self): return 1
class DefaultDecoder(SetDecoder): # FOR DECODING THE default DOMAIN TYPE (UNKNOWN-AT-QUERY-TIME SET OF VALUES) def __init__(self, edge, query, limit): AggsDecoder.__init__(self, edge, query, limit) self.domain = edge.domain self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT) self.parts = list() self.key2index = {} self.computed_domain = False self.script = self.edge.value.partial_eval().to_es_script(self.schema) self.pull = pull_functions[self.script.data_type] self.missing = self.script.miss.partial_eval() self.exists = NotOp("not", self.missing).partial_eval() # WHEN SORT VALUE AND EDGE VALUE MATCHES, WE SORT BY TERM sort_candidates = [s for s in self.query.sort if s.value == self.edge.value] if sort_candidates: self.es_order = {"_term": {1: "asc", -1: "desc"}[sort_candidates[0].sort]} else: self.es_order = None def append_query(self, es_query, start): self.start = start if not isinstance(self.edge.value, Variable): if self.exists is TRUE: # IF True THEN WE DO NOT NEED THE _filter OR THE _missing (THIS RARELY HAPPENS THOUGH) output = wrap({"aggs": { "_match": set_default( {"terms": { "script": self.script.expr, "size": self.domain.limit, "order": self.es_order }}, es_query ) }}) else: output = wrap({"aggs": { "_match": { # _match AND _filter REVERSED SO _match LINES UP WITH _missing "filter": self.exists.to_esfilter(self.schema), "aggs": { "_filter": set_default( {"terms": { "script": self.script.expr, "size": self.domain.limit, "order": self.es_order }}, es_query ) } }, "_missing": set_default( {"filter": self.missing.to_esfilter(self.schema)}, es_query ) }}) return output else: output = wrap({"aggs": { "_match": set_default( {"terms": { "field": self.schema.leaves(self.edge.value.var)[0].es_column, "size": self.domain.limit, "order": self.es_order }}, es_query ), "_missing": set_default( {"filter": self.missing.to_esfilter(self.schema)}, es_query ) }}) return output def count(self, row): part = row[self.start] if part['doc_count']: if part.get('key') != None: self.parts.append(self.pull(part.get('key'))) else: self.edge.allowNulls = True # OK! WE WILL ALLOW NULLS def done_count(self): self.edge.domain = self.domain = SimpleSetDomain( partitions=jx.sort(set(self.parts)) ) self.parts = None self.computed_domain = True def get_index(self, row): if self.computed_domain: try: part = row[self.start] return self.domain.getIndexByKey(self.pull(part.get('key'))) except Exception as e: Log.error("problem", cause=e) else: try: part = row[self.start] key = self.pull(part.get('key')) i = self.key2index.get(key) if i is None: i = len(self.parts) part = {"key": key, "dataIndex": i} self.parts.append(part) self.key2index[key] = i return i except Exception as e: Log.error("problem", cause=e) @property def num_columns(self): return 1