def _map_term_using_schema(master, path, term, schema_edges): """ IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM """ output = FlatList() for k, v in term.items(): dimension = schema_edges[k] if isinstance(dimension, Dimension): domain = dimension.getDomain() if dimension.fields: if isinstance(dimension.fields, Mapping): # EXPECTING A TUPLE for local_field, es_field in dimension.fields.items(): local_value = v[local_field] if local_value == None: output.append({"missing": {"field": es_field}}) else: output.append({"term": {es_field: local_value}}) continue if len(dimension.fields) == 1 and is_variable_name(dimension.fields[0]): # SIMPLE SINGLE-VALUED FIELD if domain.getPartByKey(v) is domain.NULL: output.append({"missing": {"field": dimension.fields[0]}}) else: output.append({"term": {dimension.fields[0]: v}}) continue if AND(is_variable_name(f) for f in dimension.fields): # EXPECTING A TUPLE if not isinstance(v, tuple): Log.error("expecing {{name}}={{value}} to be a tuple", name= k, value= v) for i, f in enumerate(dimension.fields): vv = v[i] if vv == None: output.append({"missing": {"field": f}}) else: output.append({"term": {f: vv}}) continue if len(dimension.fields) == 1 and is_variable_name(dimension.fields[0]): if domain.getPartByKey(v) is domain.NULL: output.append({"missing": {"field": dimension.fields[0]}}) else: output.append({"term": {dimension.fields[0]: v}}) continue if domain.partitions: part = domain.getPartByKey(v) if part is domain.NULL or not part.esfilter: Log.error("not expected to get NULL") output.append(part.esfilter) continue else: Log.error("not expected") elif isinstance(v, Mapping): sub = _map_term_using_schema(master, path + [k], v, schema_edges[k]) output.append(sub) continue output.append({"term": {k: v}}) return {"and": output}
def _map_term_using_schema(master, path, term, schema_edges): """ IF THE WHERE CLAUSE REFERS TO FIELDS IN THE SCHEMA, THEN EXPAND THEM """ output = FlatList() for k, v in term.items(): dimension = schema_edges[k] if isinstance(dimension, Dimension): domain = dimension.getDomain() if dimension.fields: if isinstance(dimension.fields, Mapping): # EXPECTING A TUPLE for local_field, es_field in dimension.fields.items(): local_value = v[local_field] if local_value == None: output.append({"missing": {"field": es_field}}) else: output.append({"term": {es_field: local_value}}) continue if len(dimension.fields) == 1 and is_variable_name(dimension.fields[0]): # SIMPLE SINGLE-VALUED FIELD if domain.getPartByKey(v) is domain.NULL: output.append({"missing": {"field": dimension.fields[0]}}) else: output.append({"term": {dimension.fields[0]: v}}) continue if AND(is_variable_name(f) for f in dimension.fields): # EXPECTING A TUPLE if not isinstance(v, tuple): Log.error("expecing {{name}}={{value}} to be a tuple", name= k, value= v) for i, f in enumerate(dimension.fields): vv = v[i] if vv == None: output.append({"missing": {"field": f}}) else: output.append({"term": {f: vv}}) continue if len(dimension.fields) == 1 and is_variable_name(dimension.fields[0]): if domain.getPartByKey(v) is domain.NULL: output.append({"missing": {"field": dimension.fields[0]}}) else: output.append({"term": {dimension.fields[0]: v}}) continue if domain.partitions: part = domain.getPartByKey(v) if part is domain.NULL or not part.esfilter: Log.error("not expected to get NULL") output.append(part.esfilter) continue else: Log.error("not expected") elif isinstance(v, Mapping): sub = _map_term_using_schema(master, path + [k], v, schema_edges[k]) output.append(sub) continue output.append({"term": {k: v}}) return {"and": output}
def compileNullTest(edge): """ RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS """ if edge.domain.type not in domains.ALGEBRAIC: Log.error("can only translate time and duration domains") # IS THERE A LIMIT ON THE DOMAIN? value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" if not edge.domain.max: if not edge.domain.min: return False bot = value2MVEL(edge.domain.min) nullTest = "" + value + "<" + bot elif not edge.domain.min: top = value2MVEL(edge.domain.max) nullTest = "" + value + ">=" + top else: top = value2MVEL(edge.domain.max) bot = value2MVEL(edge.domain.min) nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")" return nullTest
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, text_type): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr]) else: return expr
def compileNullTest(edge): """ RETURN A MVEL EXPRESSION THAT WILL EVALUATE TO true FOR OUT-OF-BOUNDS """ if edge.domain.type not in domains.ALGEBRAIC: Log.error("can only translate time and duration domains") # IS THERE A LIMIT ON THE DOMAIN? value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" if not edge.domain.max: if not edge.domain.min: return False bot = value2MVEL(edge.domain.min) nullTest = "" + value + "<" + bot elif not edge.domain.min: top = value2MVEL(edge.domain.max) nullTest = "" + value + ">=" + top else: top = value2MVEL(edge.domain.max) bot = value2MVEL(edge.domain.min) nullTest = "(" + value + "<" + bot + ") or (" + value + ">=" + top + ")" return nullTest
def compileDuration2Term(edge): if edge.esscript: Log.error("edge script not supported yet") # IS THERE A LIMIT ON THE DOMAIN? numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO) nullTest = compileNullTest(edge) ms = edge.domain.interval.milli if edge.domain.interval.month > 0: ms = durations.YEAR.milli / 12 * edge.domain.interval.month partition2int = "Math.floor((" + value + "-" + value2MVEL( ref) + ")/" + ms + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey( ref.add(edge.domain.interval.multiply(value))) return Data(toTerm={ "head": "", "body": partition2int }, fromTerm=int2Partition)
def compileDuration2Term(edge): if edge.esscript: Log.error("edge script not supported yet") # IS THERE A LIMIT ON THE DOMAIN? numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" ref = coalesce(edge.domain.min, edge.domain.max, durations.ZERO) nullTest = compileNullTest(edge) ms = edge.domain.interval.milli if edge.domain.interval.month > 0: ms = durations.YEAR.milli / 12 * edge.domain.interval.month partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + ms + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value))) return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def _where_terms(master, where, schema): """ USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ if isinstance(where, Mapping): if where.term: # MAP TERM try: output = _map_term_using_schema(master, [], where.term, schema.edges) return output except Exception as e: Log.error("programmer problem?", e) elif where.terms: # MAP TERM output = FlatList() for k, v in where.terms.items(): if not isinstance(v, (list, set)): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: if isinstance(edge, basestring): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: domain = edge.getDomain() except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields if isinstance(fields, Mapping): or_agg = [] for vv in v: and_agg = [] for local_field, es_field in fields.items(): vvv = vv[local_field] if vvv != None: and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) elif isinstance(fields, list) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) return {"and": output} elif where["or"]: return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]} elif where["and"]: return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]} elif where["not"]: return {"not": unwrap(_where_terms(master, where["not"], schema))} return where
def _where_terms(master, where, schema): """ USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS) """ if isinstance(where, Mapping): if where.term: # MAP TERM try: output = _map_term_using_schema(master, [], where.term, schema.edges) return output except Exception as e: Log.error("programmer problem?", e) elif where.terms: # MAP TERM output = FlatList() for k, v in where.terms.items(): if not isinstance(v, (list, set)): Log.error("terms filter expects list of values") edge = schema.edges[k] if not edge: output.append({"terms": {k: v}}) else: if isinstance(edge, text_type): # DIRECT FIELD REFERENCE return {"terms": {edge: v}} try: domain = edge.getDomain() except Exception as e: Log.error("programmer error", e) fields = domain.dimension.fields if isinstance(fields, Mapping): or_agg = [] for vv in v: and_agg = [] for local_field, es_field in fields.items(): vvv = vv[local_field] if vvv != None: and_agg.append({"term": {es_field: vvv}}) or_agg.append({"and": and_agg}) output.append({"or": or_agg}) elif isinstance(fields, list) and len(fields) == 1 and is_variable_name(fields[0]): output.append({"terms": {fields[0]: v}}) elif domain.partitions: output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]}) return {"and": output} elif where["or"]: return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]} elif where["and"]: return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]} elif where["not"]: return {"not": unwrap(_where_terms(master, where["not"], schema))} return where
def compileNumeric2Term(edge): if edge.script: Log.error("edge script not supported yet") if edge.domain.type != "numeric" and edge.domain.type != "count": Log.error("can only translate numeric domains") numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" if not edge.domain.max: if not edge.domain.min: ref = 0 partition2int = "Math.floor(" + value + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "false" else: ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "" + value + "<" + ref elif not edge.domain.min: ref = value2MVEL(edge.domain.max) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "" + value + ">=" + ref else: top = value2MVEL(edge.domain.max) ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL( edge.domain.interval) + ")" nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" offset = convert.value2int(ref) def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset) return Data(toTerm={ "head": "", "body": partition2int }, fromTerm=int2Partition)
def compileString2Term(edge): if edge.esscript: Log.error("edge script not supported yet") value = edge.value if is_variable_name(value): value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)}) else: Log.error("not handled") def fromTerm(value): return edge.domain.getPartByKey(value) return Data(toTerm={"head": "", "body": value}, fromTerm=fromTerm)
def compileTime2Term(edge): """ RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS) """ if edge.esscript: Log.error("edge script not supported yet") # IS THERE A LIMIT ON THE DOMAIN? numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" nullTest = compileNullTest(edge) ref = coalesce(edge.domain.min, edge.domain.max, datetime(2000, 1, 1)) if edge.domain.interval.month > 0: offset = ref.subtract(ref.floorMonth(), durations.DAY).milli if offset > durations.DAY.milli * 28: offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli partition2int = "milli2Month(" + value + ", " + value2MVEL( offset) + ")" partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")" def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d) else: partition2int = "Math.floor((" + value + "-" + value2MVEL( ref) + ")/" + edge.domain.interval.milli + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey( ref.add(edge.domain.interval.multiply(value))) return Data(toTerm={ "head": "", "body": partition2int }, fromTerm=int2Partition)
def is_fieldop(query): # THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP) select = listwrap(query.select) if not query.edges: isDeep = len(split_field(query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT isSimple = AND(s.value != None and (s.value == "*" or is_variable_name(s.value)) for s in select) noAgg = AND(s.aggregate == "none" for s in select) if not isDeep and isSimple and noAgg: return True else: isSmooth = AND((e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges) if isSmooth: return True return False
def compileString2Term(edge): if edge.esscript: Log.error("edge script not supported yet") value = edge.value if is_variable_name(value): value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)}) else: Log.error("not handled") def fromTerm(value): return edge.domain.getPartByKey(value) return Data( toTerm={"head": "", "body": value}, fromTerm=fromTerm )
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_schema() # GET IDS OF DOCUMENTS results = self._es.search({ "fields": listwrap(schema._routing.path), "query": {"filtered": { "query": {"match_all": {}}, "filter": jx_expression(command.where).to_esfilter() }}, "size": 200000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_ruby()}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8') response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"consistency": self.settings.consistency} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def update(self, command): """ EXPECTING command == {"set":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS AN ES FILTER """ command = wrap(command) schema = self._es.get_properties() # GET IDS OF DOCUMENTS results = self._es.search({ "stored_fields": listwrap(schema._routing.path), "query": {"bool": { "filter": jx_expression(command.where).to_esfilter(Null) }}, "size": 10000 }) # SCRIPT IS SAME FOR ALL (CAN ONLY HANDLE ASSIGNMENT TO CONSTANT) scripts = FlatList() for k, v in command.set.items(): if not is_variable_name(k): Log.error("Only support simple paths for now") if isinstance(v, Mapping) and v.doc: scripts.append({"doc": v.doc}) else: v = scrub(v) scripts.append({"script": "ctx._source." + k + " = " + jx_expression(v).to_painless(schema).script(schema)}) if results.hits.hits: updates = [] for h in results.hits.hits: for s in scripts: updates.append({"update": {"_id": h._id, "_routing": unwraplist(h.fields[literal_field(schema._routing.path)])}}) updates.append(s) content = ("\n".join(convert.value2json(c) for c in updates) + "\n").encode('utf-8') response = self._es.cluster.post( self._es.path + "/_bulk", data=content, headers={"Content-Type": "application/json"}, timeout=self.settings.timeout, params={"wait_for_active_shards": self.settings.wait_for_active_shards} ) if response.errors: Log.error("could not update: {{error}}", error=[e.error for i in response["items"] for e in i.values() if e.status not in (200, 201)])
def compileTime2Term(edge): """ RETURN MVEL CODE THAT MAPS TIME AND DURATION DOMAINS DOWN TO AN INTEGER AND AND THE JAVASCRIPT THAT WILL TURN THAT INTEGER BACK INTO A PARTITION (INCLUDING NULLS) """ if edge.esscript: Log.error("edge script not supported yet") # IS THERE A LIMIT ON THE DOMAIN? numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" nullTest = compileNullTest(edge) ref = coalesce(edge.domain.min, edge.domain.max, datetime(2000, 1, 1)) if edge.domain.interval.month > 0: offset = ref.subtract(ref.floorMonth(), durations.DAY).milli if offset > durations.DAY.milli * 28: offset = ref.subtract(ref.ceilingMonth(), durations.DAY).milli partition2int = "milli2Month(" + value + ", " + value2MVEL(offset) + ")" partition2int = "((" + nullTest + ") ? 0 : " + partition2int + ")" def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d) else: partition2int = "Math.floor((" + value + "-" + value2MVEL(ref) + ")/" + edge.domain.interval.milli + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value))) return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def compileNumeric2Term(edge): if edge.script: Log.error("edge script not supported yet") if edge.domain.type != "numeric" and edge.domain.type != "count": Log.error("can only translate numeric domains") numPartitions = len(edge.domain.partitions) value = edge.value if is_variable_name(value): value = "doc[\"" + value + "\"].value" if not edge.domain.max: if not edge.domain.min: ref = 0 partition2int = "Math.floor(" + value + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "false" else: ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "" + value + "<" + ref elif not edge.domain.min: ref = value2MVEL(edge.domain.max) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "" + value + ">=" + ref else: top = value2MVEL(edge.domain.max) ref = value2MVEL(edge.domain.min) partition2int = "Math.floor((" + value + "-" + ref + ")/" + value2MVEL(edge.domain.interval) + ")" nullTest = "(" + value + "<" + ref + ") or (" + value + ">=" + top + ")" partition2int = "((" + nullTest + ") ? " + numPartitions + " : " + partition2int + ")" offset = convert.value2int(ref) def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset) return Data(toTerm={"head": "", "body": partition2int}, fromTerm=int2Partition)
def es_countop(es, mvel, query): """ RETURN SINGLE COUNT """ select = listwrap(query.select) FromES = build_es_query(query) for s in select: if is_variable_name(s.value): FromES.facets[s.name] = { "terms": { "field": s.value, "size": query.limit, }, "facet_filter": { "exists": { "field": s.value } } } else: # COMPLICATED value IS PROBABLY A SCRIPT, USE IT FromES.facets[s.name] = { "terms": { "script_field": es09.expressions.compile_expression(s.value, query), "size": 200000 } } data = es_post(es, FromES, query.limit) matricies = {} for s in select: matricies[s.name] = Matrix(value=data.hits.facets[s.name].total) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def is_fieldop(query): # THESE SMOOTH EDGES REQUIRE ALL DATA (SETOP) select = listwrap(query.select) if not query.edges: isDeep = len(split_field( query.frum.name)) > 1 # LOOKING INTO NESTED WILL REQUIRE A SCRIPT isSimple = AND( s.value != None and (s.value == "*" or is_variable_name(s.value)) for s in select) noAgg = AND(s.aggregate == "none" for s in select) if not isDeep and isSimple and noAgg: return True else: isSmooth = AND(( e.domain.type in domains.ALGEBRAIC and e.domain.interval == "none") for e in query.edges) if isSmooth: return True return False
def convert(self, expr): """ ADD THE ".$value" SUFFIX TO ALL VARIABLES """ if isinstance(expr, Expression): vars_ = expr.vars() rename = {v: concat_field(v, "$value") for v in vars_} return expr.map(rename) if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX return expr + ".$value" elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({ name: self.convert(value) for name, value in expr.items() }) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return self.converter_map.get(k, self._convert_bop)(k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr])
def convert(self, expr): """ ADD THE ".$value" SUFFIX TO ALL VARIABLES """ if isinstance(expr, Expression): vars_ = expr.vars() rename = {v: concat_field(v, "$value") for v in vars_} return expr.map(rename) if expr is True or expr == None or expr is False: return expr elif Math.is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): #TODO: LOOKUP SCHEMA AND ADD ALL COLUMNS WITH THIS PREFIX return expr + ".$value" elif isinstance(expr, basestring): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif isinstance(expr, QueryOp): return self._convert_query(expr) elif isinstance(expr, Mapping): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.items()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return self.converter_map.get(k, self._convert_bop)(k, v) elif isinstance(expr, (list, set, tuple)): return wrap([self.convert(value) for value in expr])
def test_dash_var(self): self.assertTrue(is_variable_name("a-b"), "That's a good variable name!")
def compileEdges2Term(mvel_compiler, edges, constants): """ TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|) GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES. "type" CAN HAVE A VALUE OF "script", "field" OR "count" CAN USE THE constants (name, value pairs) """ # IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING edge0 = edges[0] if len(edges) == 1 and edge0.domain.type in ["set", "default"]: # THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET def temp(term): return FlatList([edge0.domain.getPartByKey(term)]) if edge0.value and is_variable_name(edge0.value): return Data( field=edge0.value, term2parts=temp ) elif COUNT(edge0.domain.dimension.fields) == 1: return Data( field=edge0.domain.dimension.fields[0], term2parts=temp ) elif not edge0.value and edge0.domain.partitions: script = mvel_compiler.Parts2TermScript(edge0.domain) return Data( expression=script, term2parts=temp ) else: return Data( expression=mvel_compiler.compile_expression(edge0.value, constants), term2parts=temp ) mvel_terms = [] # FUNCTION TO PACK TERMS fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS for e in edges: domain = e.domain fields = domain.dimension.fields if not e.value and fields: code, decode = mvel_compiler.Parts2Term(e.domain) t = Data( toTerm=code, fromTerm=decode ) elif fields: Log.error("not expected") elif e.domain.type == "time": t = compileTime2Term(e) elif e.domain.type == "duration": t = compileDuration2Term(e) elif e.domain.type in domains.ALGEBRAIC: t = compileNumeric2Term(e) elif e.domain.type == "set" and not fields: def fromTerm(term): return e.domain.getPartByKey(term) code, decode = mvel_compiler.Parts2Term(e.domain) t = Data( toTerm=code, fromTerm=decode ) else: t = compileString2Term(e) if not t.toTerm.body: mvel_compiler.Parts2Term(e.domain) Log.unexpected("what?") fromTerm2Part.append(t.fromTerm) mvel_terms.append(t.toTerm.body) # REGISTER THE DECODE FUNCTION def temp(term): terms = term.split('|') output = FlatList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)]) return output return Data( expression=mvel_compiler.compile_expression("+'|'+".join(mvel_terms), constants), term2parts=temp )
def __init__(self, var): Expression.__init__(self, "", None) if not is_variable_name(var): Log.error("Expecting a variable name") self.var = get_property_name(var)
def es_terms_stats(esq, mvel, query): select = listwrap(query.select) facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART termsEdges = FlatList() specialEdge = None special_index = -1 # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME # FIND THE specialEdge, IF ONE for f, tedge in enumerate(query.edges): if tedge.domain.type in domains.KNOWN: for p, part in enumerate(tedge.domain.partitions): part.dataIndex = p # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) # OR IF WE ARE NOT SIMPLY COUNTING # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) # OR IF WE JUST WANT TO FORCE IT :) # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM facetEdges.append(tedge) else: if specialEdge: Log.error( "There is more than one open-ended edge: self can not be handled" ) specialEdge = tedge special_index = f termsEdges.append(tedge) if not specialEdge: # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? num_parts = 0 special_index = -1 for i, e in enumerate(facetEdges): l = len(e.domain.partitions) if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: num_parts = l specialEdge = e special_index = i facetEdges.pop(special_index) termsEdges.append(specialEdge) total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges) * len(select) if total_facets > 100: # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: counts = esq.query({ "from": query.frum, "select": { "aggregate": "count" }, "edges": facetEdges, "where": query.where, "limit": query.limit }) esFacets = [] def add_facet(value, parts, cube): if value: esFacets.append(parts) counts["count"].forall(add_facet) Log.note( "{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count=len(esFacets), theory_count=total_facets) if not esFacets: # MAKE EMPTY CUBE matricies = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: matricies[s.name] = Matrix(*dims) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube else: # GENERATE ALL COMBOS esFacets = getAllEdges(facetEdges) calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) term2parts = calcTerm.term2parts if len(esFacets) * len(select) > 1000: Log.error( "not implemented yet" ) # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES pass FromES = build_es_query(query) for s in select: for parts in esFacets: condition = FlatList() constants = FlatList() name = [literal_field(s.name)] for f, fedge in enumerate(facetEdges): name.append(str(parts[f].dataIndex)) condition.append(buildCondition(mvel, fedge, parts[f])) constants.append({ "name": fedge.domain.name, "value": parts[f] }) condition.append(query.where) name = ",".join(name) FromES.facets[name] = { "terms_stats": { "key_field": calcTerm.field, "value_field": s.value if is_variable_name(s.value) else None, "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, "size": coalesce(query.limit, 200000) } } if condition: FromES.facets[name].facet_filter = simplify_esfilter( {"and": condition}) data = es_post(esq.es, FromES, query.limit) if specialEdge.domain.type not in domains.KNOWN: # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED partitions = FlatList() map = {} for facetName, parts in data.facets.items(): for stats in parts.terms: if not map[stats]: part = {"value": stats, "name": stats} partitions.append(part) map[stats] = part partitions.sort(specialEdge.domain.compare) for p, part in enumerate(partitions): part.dataIndex = p specialEdge.domain.map = map specialEdge.domain.partitions = partitions # MAKE CUBE matricies = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: matricies[s.name] = Matrix(*dims) name2agg = {s.name: aggregates[s.aggregate] for s in select} # FILL CUBE for edgeName, parts in data.facets.items(): temp = edgeName.split(",") pre_coord = tuple(int(c) for c in temp[1:]) sname = temp[0] for stats in parts.terms: if specialEdge: special = term2parts(stats.term)[0] coord = pre_coord[:special_index] + ( special.dataIndex, ) + pre_coord[special_index:] else: coord = pre_coord matricies[sname][coord] = stats[name2agg[sname]] cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def buildCondition(mvel, edge, partition): """ RETURN AN ES FILTER OBJECT """ output = {} if edge.domain.isFacet: # MUST USE THIS' esFacet condition = wrap(coalesce(partition.where, {"and": []})) if partition.min and partition.max and is_variable_name(edge.value): condition["and"].append({ "range": { edge.value: { "gte": partition.min, "lt": partition.max } } }) # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT) return simplify_esfilter(condition) elif edge.range: # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN # USE MVEL CODE if edge.domain.type in domains.ALGEBRAIC: output = {"and": []} if edge.range.mode and edge.range.mode == "inclusive": # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE if is_variable_name(edge.range.min): output["and"].append({ "range": { edge.range.min: { "lt": es09.expressions.value2value(partition.max) } } }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( edge.range.min + " < " + es09.expressions.value2MVEL(partition.max)) } }) if is_variable_name(edge.range.max): output["and"].append({ "or": [{ "missing": { "field": edge.range.max } }, { "range": { edge.range.max, { "gt": es09.expressions.value2value(partition.min) } } }] }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( edge.range.max + " > " + es09.expressions.value2MVEL(partition.min)) } }) else: # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE if is_variable_name(edge.range.min): output["and"].append({ "range": { edge.range.min: { "lte": es09.expressions.value2value(partition.min) } } }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min)) } }) if is_variable_name(edge.range.max): output["and"].append({ "or": [{ "missing": { "field": edge.range.max } }, { "range": { edge.range.max, { "gte": es09.expressions.value2value(partition.min) } } }] }) else: # WHOA!! SUPER SLOW!! output["and"].append({ "script": { "script": mvel.compile_expression( es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max) } }) return output else: Log.error( "Do not know how to handle range query on non-continuous domain" ) elif not edge.value: # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE) return partition.esfilter elif is_variable_name(edge.value): # USE FAST ES SYNTAX if edge.domain.type in domains.ALGEBRAIC: output.range = {} output.range[edge.value] = { "gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max) } elif edge.domain.type == "set": if partition.value: if partition.value != edge.domain.getKey(partition): Log.error( "please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former" ) # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS output.term = {edge.value: partition.value} else: output.term = {edge.value: edge.domain.getKey(partition)} elif edge.domain.type == "default": output.term = dict() output.term[edge.value] = partition.value else: Log.error("Edge \"" + edge.name + "\" is not supported") return output else: # USE MVEL CODE if edge.domain.type in domains.ALGEBRAIC: output.script = { "script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max) } else: output.script = { "script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value) } code = es09.expressions.addFunctions(output.script.script) output.script.script = code.head + code.body return output
def jx_expression(expr): """ WRAP A JSON EXPRESSION WITH OBJECT REPRESENTATION """ if isinstance(expr, Expression): Log.error("Expecting JSON, not expression") if expr in (True, False, None) or expr == None or isinstance( expr, (float, int, Decimal, Date)): return Literal(None, expr) elif isinstance(expr, text_type): if is_variable_name(expr): return Variable(expr) elif not expr.strip(): Log.error("expression is empty") else: Log.error("expression is not recognized: {{expr}}", expr=expr) elif isinstance(expr, (list, tuple)): return TupleOp("tuple", map(jx_expression, expr)) # FORMALIZE expr = wrap(expr) if expr.date: return DateOp("date", expr) try: items = expr.items() except Exception as e: Log.error("programmer error expr = {{value|quote}}", value=expr, cause=e) for item in items: op, term = item class_ = operators.get(op) if class_: term, clauses = class_.preprocess(op, expr) break else: if not items: return NullOp() raise Log.error("{{operator|quote}} is not a known operator", operator=op) if class_ is Literal: return class_(op, term) elif class_ is ScriptOp: if ALLOW_SCRIPTING: Log.warning( "Scripting has been activated: This has known security holes!!\nscript = {{script|quote}}", script=term) return class_(op, term) else: Log.error("scripting is disabled") elif term == None: return class_(op, [], **clauses) elif isinstance(term, list): terms = map(jx_expression, term) return class_(op, terms, **clauses) elif isinstance(term, Mapping): items = term.items() if class_.has_simple_form: if len(items) == 1: k, v = items[0] return class_(op, [Variable(k), Literal(None, v)], **clauses) else: return class_(op, {k: Literal(None, v) for k, v in items}, **clauses) else: return class_(op, jx_expression(term), **clauses) else: if op in ["literal", "date", "offset"]: return class_(op, term, **clauses) else: return class_(op, jx_expression(term), **clauses)
def es_terms_stats(esq, mvel, query): select = listwrap(query.select) facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART termsEdges = FlatList() specialEdge = None special_index = -1 # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME # FIND THE specialEdge, IF ONE for f, tedge in enumerate(query.edges): if tedge.domain.type in domains.KNOWN: for p, part in enumerate(tedge.domain.partitions): part.dataIndex = p # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) # OR IF WE ARE NOT SIMPLY COUNTING # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) # OR IF WE JUST WANT TO FORCE IT :) # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM facetEdges.append(tedge) else: if specialEdge: Log.error("There is more than one open-ended edge: self can not be handled") specialEdge = tedge special_index = f termsEdges.append(tedge) if not specialEdge: # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? num_parts = 0 special_index = -1 for i, e in enumerate(facetEdges): l = len(e.domain.partitions) if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: num_parts = l specialEdge = e special_index = i facetEdges.pop(special_index) termsEdges.append(specialEdge) total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select) if total_facets > 100: # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: counts = esq.query({ "from": query.frum, "select": {"aggregate": "count"}, "edges": facetEdges, "where": query.where, "limit": query.limit }) esFacets = [] def add_facet(value, parts, cube): if value: esFacets.append(parts) counts["count"].forall(add_facet) Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count= len(esFacets), theory_count=total_facets) if not esFacets: # MAKE EMPTY CUBE matricies = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: matricies[s.name] = Matrix(*dims) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube else: # GENERATE ALL COMBOS esFacets = getAllEdges(facetEdges) calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) term2parts = calcTerm.term2parts if len(esFacets) * len(select) > 1000: Log.error("not implemented yet") # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES pass FromES = build_es_query(query) for s in select: for parts in esFacets: condition = FlatList() constants = FlatList() name = [literal_field(s.name)] for f, fedge in enumerate(facetEdges): name.append(str(parts[f].dataIndex)) condition.append(buildCondition(mvel, fedge, parts[f])) constants.append({"name": fedge.domain.name, "value": parts[f]}) condition.append(query.where) name = ",".join(name) FromES.facets[name] = { "terms_stats": { "key_field": calcTerm.field, "value_field": s.value if is_variable_name(s.value) else None, "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, "size": coalesce(query.limit, 200000) } } if condition: FromES.facets[name].facet_filter = simplify_esfilter({"and": condition}) data = es_post(esq.es, FromES, query.limit) if specialEdge.domain.type not in domains.KNOWN: # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED partitions = FlatList() map = {} for facetName, parts in data.facets.items(): for stats in parts.terms: if not map[stats]: part = {"value": stats, "name": stats} partitions.append(part) map[stats] = part partitions.sort(specialEdge.domain.compare) for p, part in enumerate(partitions): part.dataIndex = p specialEdge.domain.map = map specialEdge.domain.partitions = partitions # MAKE CUBE matricies = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: matricies[s.name] = Matrix(*dims) name2agg = {s.name: aggregates[s.aggregate] for s in select} # FILL CUBE for edgeName, parts in data.facets.items(): temp = edgeName.split(",") pre_coord = tuple(int(c) for c in temp[1:]) sname = temp[0] for stats in parts.terms: if specialEdge: special = term2parts(stats.term)[0] coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:] else: coord = pre_coord matricies[sname][coord] = stats[name2agg[sname]] cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def compileEdges2Term(mvel_compiler, edges, constants): """ TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|) GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES. "type" CAN HAVE A VALUE OF "script", "field" OR "count" CAN USE THE constants (name, value pairs) """ # IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING edge0 = edges[0] if len(edges) == 1 and edge0.domain.type in ["set", "default"]: # THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET def temp(term): return FlatList([edge0.domain.getPartByKey(term)]) if edge0.value and is_variable_name(edge0.value): return Data(field=edge0.value, term2parts=temp) elif COUNT(edge0.domain.dimension.fields) == 1: return Data(field=edge0.domain.dimension.fields[0], term2parts=temp) elif not edge0.value and edge0.domain.partitions: script = mvel_compiler.Parts2TermScript(edge0.domain) return Data(expression=script, term2parts=temp) else: return Data(expression=mvel_compiler.compile_expression( edge0.value, constants), term2parts=temp) mvel_terms = [] # FUNCTION TO PACK TERMS fromTerm2Part = [] # UNPACK TERMS BACK TO PARTS for e in edges: domain = e.domain fields = domain.dimension.fields if not e.value and fields: code, decode = mvel_compiler.Parts2Term(e.domain) t = Data(toTerm=code, fromTerm=decode) elif fields: Log.error("not expected") elif e.domain.type == "time": t = compileTime2Term(e) elif e.domain.type == "duration": t = compileDuration2Term(e) elif e.domain.type in domains.ALGEBRAIC: t = compileNumeric2Term(e) elif e.domain.type == "set" and not fields: def fromTerm(term): return e.domain.getPartByKey(term) code, decode = mvel_compiler.Parts2Term(e.domain) t = Data(toTerm=code, fromTerm=decode) else: t = compileString2Term(e) if not t.toTerm.body: mvel_compiler.Parts2Term(e.domain) Log.unexpected("what?") fromTerm2Part.append(t.fromTerm) mvel_terms.append(t.toTerm.body) # REGISTER THE DECODE FUNCTION def temp(term): terms = term.split('|') output = FlatList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)]) return output return Data(expression=mvel_compiler.compile_expression( "+'|'+".join(mvel_terms), constants), term2parts=temp)
def test_error_on_bad_var(self): self.assertFalse( is_variable_name( u'coalesce(rows[rownum+1].timestamp, Date.eod())'), "That's not a valid variable name!!")
def test_good_var(self): self.assertTrue(is_variable_name(u'_a._b'), "That's a good variable name!")
def buildCondition(mvel, edge, partition): """ RETURN AN ES FILTER OBJECT """ output = {} if edge.domain.isFacet: # MUST USE THIS' esFacet condition = wrap(coalesce(partition.where, {"and": []})) if partition.min and partition.max and is_variable_name(edge.value): condition["and"].append({ "range": {edge.value: {"gte": partition.min, "lt": partition.max}} }) # ES WILL FREAK OUT IF WE SEND {"not":{"and":x}} (OR SOMETHING LIKE THAT) return simplify_esfilter(condition) elif edge.range: # THESE REALLY NEED FACETS TO PERFORM THE JOIN-TO-DOMAIN # USE MVEL CODE if edge.domain.type in domains.ALGEBRAIC: output = {"and": []} if edge.range.mode and edge.range.mode == "inclusive": # IF THE range AND THE partition OVERLAP, THEN MATCH IS MADE if is_variable_name(edge.range.min): output["and"].append({"range": {edge.range.min: {"lt": es09.expressions.value2value(partition.max)}}}) else: # WHOA!! SUPER SLOW!! output["and"].append({"script": {"script": mvel.compile_expression( edge.range.min + " < " + es09.expressions.value2MVEL(partition.max) )}}) if is_variable_name(edge.range.max): output["and"].append({"or": [ {"missing": {"field": edge.range.max}}, {"range": {edge.range.max, {"gt": es09.expressions.value2value(partition.min)}}} ]}) else: # WHOA!! SUPER SLOW!! output["and"].append({"script": {"script": mvel.compile_expression( edge.range.max + " > " + es09.expressions.value2MVEL(partition.min))}}) else: # SNAPSHOT - IF range INCLUDES partition.min, THEN MATCH IS MADE if is_variable_name(edge.range.min): output["and"].append({"range": {edge.range.min: {"lte": es09.expressions.value2value(partition.min)}}}) else: # WHOA!! SUPER SLOW!! output["and"].append({"script": {"script": mvel.compile_expression( edge.range.min + "<=" + es09.expressions.value2MVEL(partition.min) )}}) if is_variable_name(edge.range.max): output["and"].append({"or": [ {"missing": {"field": edge.range.max}}, {"range": {edge.range.max, {"gte": es09.expressions.value2value(partition.min)}}} ]}) else: # WHOA!! SUPER SLOW!! output["and"].append({"script": {"script": mvel.compile_expression( es09.expressions.value2MVEL(partition.min) + " <= " + edge.range.max )}}) return output else: Log.error("Do not know how to handle range query on non-continuous domain") elif not edge.value: # MUST USE THIS' esFacet, AND NOT(ALL THOSE ABOVE) return partition.esfilter elif is_variable_name(edge.value): # USE FAST ES SYNTAX if edge.domain.type in domains.ALGEBRAIC: output.range = {} output.range[edge.value] = {"gte": es09.expressions.value2query(partition.min), "lt": es09.expressions.value2query(partition.max)} elif edge.domain.type == "set": if partition.value: if partition.value != edge.domain.getKey(partition): Log.error("please ensure the key attribute of the domain matches the value attribute of all partitions, if only because we are now using the former") # DEFAULT TO USING THE .value ATTRIBUTE, IF ONLY BECAUSE OF LEGACY REASONS output.term = {edge.value: partition.value} else: output.term = {edge.value: edge.domain.getKey(partition)} elif edge.domain.type == "default": output.term = dict() output.term[edge.value] = partition.value else: Log.error("Edge \"" + edge.name + "\" is not supported") return output else: # USE MVEL CODE if edge.domain.type in domains.ALGEBRAIC: output.script = {"script": edge.value + ">=" + es09.expressions.value2MVEL(partition.min) + " and " + edge.value + "<" + es09.expressions.value2MVEL(partition.max)} else: output.script = {"script": "( " + edge.value + " ) ==" + es09.expressions.value2MVEL(partition.value)} code = es09.expressions.addFunctions(output.script.script) output.script.script = code.head + code.body return output