def to_ruby(self, schema): output = CaseOp("case", [ WhenOp("when", self.lhs.missing(), **{"then": NotOp("not", self.rhs.missing())}), WhenOp("when", self.rhs.missing(), **{"then": NotOp("not", self.lhs.missing())}), NotOp("not", BasicEqOp("eq", [self.lhs, self.rhs])) ]).partial_eval().to_ruby(schema) return output
def to_painless(self, schema): return CaseOp("case", [ WhenOp("when", self.lhs.missing(), ** {"then": NotOp("not", self.rhs.missing())}), WhenOp("when", self.rhs.missing(), ** {"then": NotOp("not", self.lhs.missing())}), NotOp("not", BasicEqOp("eq", [self.lhs, self.rhs])) ]).partial_eval().to_painless(schema)
def outer_to_inner(expr, paths_to_cols): # JSON QUERY EXPRESSIONS ASSUME OUTER JOIN # ES ONLY HAS INNER JOIN # ACCOUNT FOR WHEN NESTED RECORDS ARE MISSING if expr is NULL: return NULL elif is_op(expr, ConcatOp): output = [] for outer in expr.terms: for inner in outer_to_inner(outer, paths_to_cols).terms: output.append(inner) return ConcatOp(output) elif is_op(expr, OuterJoinOp): # THE MAIN INNER JOIN output = [InnerJoinOp(expr.frum, expr.nests)] # ALL THE OUTER JOIN RESIDUES for deepest in expr.nests[:-1]: # LAST '.' NOT NEEDED deepest_path = deepest.path.var inner_join = InnerJoinOp(expr.frum, []) deeper_conditions = TRUE for nest in expr.nests: nest_path = nest.path.var if len(nest_path) < len(deepest_path): new_nest = NestedOp( path=nest.path, select=nest.select, where=AndOp([deeper_conditions, nest.where]), sort=nest.sort, limit=nest.limit, ) inner_join.nests.append(new_nest) deeper_conditions = TRUE elif nest_path == deepest_path: # assume the deeper is null set_null = { d.es_column: NULL for d in paths_to_cols[deepest_path] } set_null[deepest_path] = NULL deeper_exists = nest.where.map(set_null).partial_eval() if deeper_exists is FALSE: # WHERE CAN NOT BE SATISFIED IF NESTED IS NULL deeper_conditions = FALSE else: # ENSURE THIS IS NOT "OPTIMIZED" TO FALSE deeper_conditions = NotOp( NestedOp(path=Variable(nest_path), where=TRUE, select=NULL)) deeper_conditions.simplified = True inner_join = inner_join.partial_eval() if inner_join.missing() is not TRUE: output.append(inner_join) return ConcatOp(output) else: Log.error("do not know what to do yet")
def to_sql(self, schema, not_null=False, boolean=False): not_expr = NotOp("not", BooleanOp("boolean", self.term)).partial_eval() if isinstance(not_expr, NotOp): return wrap([{ "name": ".", "sql": { "b": "NOT " + sql_iso(not_expr.term.to_sql(schema)[0].sql.b) } }]) else: return not_expr.to_sql(schema)
def to_sql(self, schema, not_null=False, boolean=False): not_expr = NotOp(BooleanOp(self.term)).partial_eval() if is_op(not_expr, Variable): return wrap([{ "name": ".", "sql": { "b": "NOT " + sql_iso(not_expr.term.to_sql(schema)[0].sql.b) } }]) else: return not_expr.to_sql(schema)
def to_esfilter(self, schema): output = OrOp("or", [ AndOp("and", [self.when, BooleanOp("boolean", self.then)]), AndOp("and", [NotOp("not", self.when), BooleanOp("boolean", self.els_)]) ]).partial_eval() return output.to_esfilter(schema)
def to_es14_filter(self, schema): # OR(x) == NOT(AND(NOT(xi) for xi in x)) output = es_not(es_and([ NotOp("not", t).partial_eval().to_es14_filter(schema) for t in self.terms ])) return output
def to_ruby(self, schema): if not self.terms: return NULL.to_ruby(schema) v = self.terms[-1] acc = FirstOp("first", v).partial_eval().to_ruby(schema) for v in reversed(self.terms[:-1]): m = v.missing().partial_eval() e = NotOp("not", m).partial_eval().to_ruby(schema) r = FirstOp("first", v).partial_eval().to_ruby(schema) if r.miss is TRUE: continue elif r.miss is FALSE: acc = r continue elif acc.type == r.type: new_type = r.type elif acc.type == NUMBER and r.type == INTEGER: new_type = NUMBER elif acc.type == INTEGER and r.type == NUMBER: new_type = NUMBER else: new_type = OBJECT acc = Ruby(miss=AndOp("and", [acc.miss, m]).partial_eval(), type=new_type, expr="(" + e.expr + ") ? (" + r.expr + ") : (" + acc.expr + ")", frum=self) return acc
def _split_expression(expr, schema, all_paths): """ :param expr: JSON EXPRESSION :return: ARRAY INDEX BY (CONCAT, OUTER JOIN, AND) """ expr = expr.partial_eval() if is_op(expr, AndOp): acc = [tuple([] for _ in all_paths)] for t in expr.terms: next = [] for c in _split_expression(t, schema, all_paths): for a in acc: next.append(tuple(n + an for n, an in zip(c, a))) acc = next return acc elif is_op(expr, OrOp): output = [] exclude = [] for t in expr.terms: for c in _split_expression(AndOp([AndOp(exclude), t]), schema, all_paths): output.append(c) exclude.append(NotOp(t)) return output elif is_op(expr, NestedOp): acc = tuple([expr.where] if p == expr.path.var else [] for i, p in enumerate(all_paths)) return [acc] elif is_op(expr, NotOp): acc = [ tuple([NotOp(a) for a in o] for o in t) for t in _split_expression(expr.term, schema, all_paths) ] return acc all_nests = list( set(c.nested_path[0] for v in expr.vars() for c in schema.values(v.var))) if len(all_nests) > 1: Log.error("do not know how to handle") elif not all_nests: return [tuple([expr] if p == "." else [] for p in all_paths)] else: return [tuple([expr] if p == all_nests[0] else [] for p in all_paths)]
def to_painless(self, schema): value = self.term.to_painless(schema) if value.many: return BooleanOp("boolean", Painless( miss=value.miss, type=value.type, expr="(" + value.expr + ")[0]", frum=value.frum )).to_painless(schema) elif value.type == BOOLEAN: miss = value.miss value.miss = FALSE return WhenOp("when", miss, **{"then": FALSE, "else": value}).partial_eval().to_painless(schema) else: return NotOp("not", value.miss).partial_eval().to_painless(schema)
def to_es14_script(self, schema, not_null=False, boolean=False, many=True): value = self.term.to_es14_script(schema) if value.many: return BooleanOp("boolean", EsScript( miss=value.miss, type=value.type, expr="(" + value.expr + ")[0]", frum=value.frum )).to_es14_script(schema) elif value.type == BOOLEAN: miss = value.miss value.miss = FALSE return WhenOp("when", miss, **{"then": FALSE, "else": value}).partial_eval().to_es14_script(schema) else: return NotOp("not", value.miss).partial_eval().to_es14_script(schema)
def to_esfilter(self, schema): # TODO: REPLICATE THIS WHOLE expression.py SO IT IS CLEAR ES5 QUERIES ARE A BIT DIFFERENT if schema.snowflake.namespace.es_cluster.version.startswith("5."): # VERSION 5.2.x # WE REQUIRE EXIT-EARLY SEMANTICS, OTHERWISE EVERY EXPRESSION IS A SCRIPT EXPRESSION # {"bool":{"should" :[a, b, c]}} RUNS IN PARALLEL # {"bool":{"must_not":[a, b, c]}} ALSO RUNS IN PARALLEL # OR(x) == NOT(AND(NOT(xi) for xi in x)) output = es_not(es_and([ NotOp("not", t).partial_eval().to_esfilter(schema) for t in self.terms ])) return output else: # VERSION 6.2 return es_or([t.partial_eval().to_esfilter(schema) for t in self.terms])
def split(expr): """ :param expr: JSON EXPRESSION :return: ARRAY INDEX BY (CONCAT, OUTER JOIN, AND) """ expr = expr.partial_eval() if is_op(expr, AndOp): acc = [tuple([] for _ in all_paths)] for t in expr.terms: next = [] for c in split(t): for a in acc: next.append(tuple(n + an for n, an in zip(c, a))) acc = next return acc elif is_op(expr, OrOp): output = [] exclude = [] for t in expr.terms: for c in split(AndOp([AndOp(exclude), t])): output.append(c) exclude.append(NotOp(t)) return output all_nests = list( set(c.nested_path[0] for v in expr.vars() for c in frum.schema.values(v.var))) if len(all_nests) > 1: Log.error("do not know how to handle") elif not all_nests: return [tuple([expr] if p == "." else [] for p in all_paths)] else: return [ tuple([expr] if p == all_nests[0] else [] for p in all_paths) ]
def to_sql(self, schema, not_null=False, boolean=False): return NotOp("not", EqOp("eq", [self.lhs, self.rhs])).to_sql(schema, not_null, boolean)
def to_sql(self, schema, not_null=False, boolean=False): return NotOp(EqOp([self.lhs, self.rhs]).partial_eval()).partial_eval().to_sql(schema)
def to_es14_script(self, schema, not_null=False, boolean=False, many=True): return CaseOp("case", [ WhenOp("when", self.lhs.missing(), **{"then": NotOp("not", self.rhs.missing())}), WhenOp("when", self.rhs.missing(), **{"then": NotOp("not", self.lhs.missing())}), NotOp("not", BasicEqOp("eq", [self.lhs, self.rhs])) ]).partial_eval().to_es14_script(schema)
def to_esfilter(self, schema): return NotOp("not", self.missing()).partial_eval().to_esfilter(schema)
def to_sql(self, schema, not_null=False, boolean=False): return NotOp('not', EqOp('eq', self.terms).partial_eval()).partial_eval().to_sql(schema)