def named_tuple_as_json_object(expr, *, stype, env): assert stype.is_tuple() and stype.named keyvals = [] subtypes = stype.iter_subtypes() for el_idx, (el_name, el_type) in enumerate(subtypes): keyvals.append(pgast.StringConstant(val=el_name)) type_sentinel = pgast.TypeCast( arg=pgast.NullConstant(), type_name=pgast.TypeName( name=pgtypes.pg_type_from_object(env.schema, el_type))) val = pgast.FuncCall(name=('edgedb', 'row_getattr_by_num'), args=[ expr, pgast.NumericConstant(val=str(el_idx + 1)), type_sentinel ]) if el_type.is_collection(): val = coll_as_json_object(val, stype=el_type, env=env) keyvals.append(val) return pgast.FuncCall(name=('jsonb_build_object', ), args=keyvals, null_safe=True, ser_safe=True, nullable=expr.nullable)
def serialize_expr_to_json(expr: pgast.Base, *, path_id: irast.PathId, nested: bool = False, env: context.Environment) -> pgast.Base: if isinstance(expr, pgast.TupleVar): val = tuple_var_as_json_object(expr, path_id=path_id, env=env) elif isinstance(expr, (pgast.RowExpr, pgast.ImplicitRowExpr)): val = pgast.FuncCall( name=('jsonb_build_array', ), args=expr.args, null_safe=True, ser_safe=True, ) elif not nested: if path_id.target.is_collection() and not expr.ser_safe: val = coll_as_json_object(expr, stype=path_id.target, env=env) else: val = pgast.FuncCall(name=('to_jsonb', ), args=[expr], null_safe=True, ser_safe=True) else: val = expr return val
def unnamed_tuple_as_json_object(expr, *, stype, env): assert stype.is_tuple() and not stype.named vals = [] subtypes = stype.get_subtypes() for el_idx, el_type in enumerate(subtypes): type_sentinel = pgast.TypeCast( arg=pgast.NullConstant(), type_name=pgast.TypeName( name=pgtypes.pg_type_from_object(env.schema, el_type))) val = pgast.FuncCall(name=('edgedb', 'row_getattr_by_num'), args=[ expr, pgast.NumericConstant(val=str(el_idx + 1)), type_sentinel ]) if el_type.is_collection(): val = coll_as_json_object(val, stype=el_type, env=env) vals.append(val) return pgast.FuncCall(name=( 'edgedb', 'row_to_jsonb_array', ), args=[expr], null_safe=True, ser_safe=True, nullable=expr.nullable)
def ensure_transient_identity_for_set(ir_set: irast.Set, stmt: pgast.Query, *, ctx: context.CompilerContextLevel, type='int') -> None: if type == 'uuid': id_expr = pgast.FuncCall( name=( 'edgedb', 'uuid_generate_v1mc', ), args=[], ) else: id_expr = pgast.FuncCall(name=('row_number', ), args=[], over=pgast.WindowDef()) pathctx.put_path_identity_var(stmt, ir_set.path_id, id_expr, force=True, env=ctx.env) pathctx.put_path_bond(stmt, ir_set.path_id)
def serialize_expr( expr: pgast.Base, *, nested: bool=False, env: context.Environment) -> pgast.Base: if env.output_format == context.OutputFormat.JSON: if isinstance(expr, pgast.TupleVar): val = tuple_var_as_json_object(expr, env=env) elif isinstance(expr, pgast.ImplicitRowExpr): val = pgast.FuncCall( name=('jsonb_build_array',), args=expr.args, null_safe=True) elif not nested: val = pgast.FuncCall( name=('to_jsonb',), args=[expr], null_safe=True) else: val = expr elif env.output_format == context.OutputFormat.NATIVE: val = output_as_value(expr, env=env) else: raise RuntimeError(f'unexpected output format: {env.output_format!r}') return val
def tuple_var_as_json_object(tvar, *, env): if not tvar.named: return pgast.FuncCall( name=('jsonb_build_array',), args=[serialize_expr(t.val, nested=True, env=env) for t in tvar.elements], null_safe=True, nullable=tvar.nullable) else: keyvals = [] for element in tvar.elements: rptr = element.path_id.rptr() if rptr is None: name = element.path_id[-1].name.name else: name = rptr.shortname.name if rptr.is_link_property(): name = '@' + name keyvals.append(pgast.Constant(val=name)) if isinstance(element.val, pgast.TupleVar): val = serialize_expr(element.val, env=env) else: val = element.val keyvals.append(val) return pgast.FuncCall( name=('jsonb_build_object',), args=keyvals, null_safe=True, nullable=tvar.nullable)
def array_as_json_object(expr, *, stype, env): if stype.element_type.is_tuple(): coldeflist = [] json_args = [] is_named = stype.element_type.named for n, st in stype.element_type.iter_subtypes(): colname = env.aliases.get(str(n)) if is_named: json_args.append(pgast.StringConstant(val=n)) val = pgast.ColumnRef(name=[colname]) if st.is_collection(): val = coll_as_json_object(val, stype=st, env=env) json_args.append(val) coldeflist.append( pgast.ColumnDef( name=colname, typename=pgast.TypeName( name=pgtypes.pg_type_from_object(env.schema, st)))) if is_named: json_func = 'jsonb_build_object' else: json_func = 'jsonb_build_array' return pgast.SelectStmt(target_list=[ pgast.ResTarget( val=pgast.FuncCall(name=('jsonb_agg', ), args=[ pgast.FuncCall( name=(json_func, ), args=json_args, ) ]), ser_safe=True, ) ], from_clause=[ pgast.RangeFunction( alias=pgast.Alias( aliasname=env.aliases.get('q'), ), coldeflist=coldeflist, functions=[ pgast.FuncCall( name=('unnest', ), args=[expr], ) ]) ]) else: return pgast.FuncCall(name=('to_jsonb', ), args=[expr], null_safe=True, ser_safe=True)
def compile_TypeCast( expr: irast.TypeCast, *, ctx: context.CompilerContextLevel) -> pgast.Base: pg_expr = dispatch.compile(expr.expr, ctx=ctx) if expr.sql_cast: # Use explicit SQL cast. to_type = irutils.typeref_to_type(ctx.env.schema, expr.to_type) pg_type = pg_types.pg_type_from_object(ctx.env.schema, to_type) return pgast.TypeCast( arg=pg_expr, type_name=pgast.TypeName( name=pg_type ) ) elif expr.sql_function or expr.sql_expr: # Cast implemented as a function. if expr.sql_expr: func_name = common.get_backend_cast_name( expr.cast_name, aspect='function') else: func_name = (expr.sql_function,) return pgast.FuncCall( name=func_name, args=[pg_expr], ) else: raise RuntimeError('cast not supported')
def fini_dml_stmt(ir_stmt: irast.MutatingStmt, wrapper: pgast.Query, dml_cte: pgast.CommonTableExpr, dml_rvar: pgast.BaseRangeVar, *, parent_ctx: context.CompilerContextLevel, ctx: context.CompilerContextLevel) -> pgast.Query: # Record the effect of this insertion in the relation overlay # context to ensure that the RETURNING clause potentially # referencing this class yields the expected results. if isinstance(ir_stmt, irast.InsertStmt): dbobj.add_rel_overlay(ir_stmt.subject.scls, 'union', dml_cte, env=ctx.env) elif isinstance(ir_stmt, irast.DeleteStmt): dbobj.add_rel_overlay(ir_stmt.subject.scls, 'except', dml_cte, env=ctx.env) if parent_ctx.toplevel_stmt is wrapper: ret_ref = pathctx.get_path_identity_var(wrapper, ir_stmt.subject.path_id, env=parent_ctx.env) count = pgast.FuncCall(name=('count', ), args=[ret_ref]) wrapper.target_list = [pgast.ResTarget(val=count)] clauses.fini_stmt(wrapper, ctx, parent_ctx) return wrapper
def top_output_as_value(stmt: pgast.Query, *, env: context.Environment) -> pgast.Query: """Finalize output serialization on the top level.""" if env.output_format == context.OutputFormat.JSON: # For JSON we just want to aggregate the whole thing # into a JSON array. subrvar = pgast.RangeSubselect( subquery=stmt, alias=pgast.Alias(aliasname=env.aliases.get('aggw'))) stmt_res = stmt.target_list[0] if stmt_res.name is None: stmt_res.name = env.aliases.get('v') new_val = pgast.FuncCall(name=('json_agg', ), args=[pgast.ColumnRef(name=[stmt_res.name])]) # XXX: nullability introspection is not reliable, # remove `True or` once it is. if True or stmt_res.val.nullable: new_val = pgast.CoalesceExpr( args=[new_val, pgast.Constant(val='[]')]) result = pgast.SelectStmt(target_list=[pgast.ResTarget(val=new_val)], from_clause=[subrvar]) result.ctes = stmt.ctes stmt.ctes = [] return result else: return stmt
def compile_Array(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: elements = [dispatch.compile(e, ctx=ctx) for e in expr.elements] result = pgast.ArrayExpr(elements=elements) if any(el.nullable for el in elements): result = pgast.FuncCall(name=('edgedb', '_nullif_array_nulls'), args=[result]) return result
def compile_IndexIndirection(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: # Handle Expr[Index], where Expr may be std::str or array<T>. # For strings we translate this into substr calls, whereas # for arrays the native slice syntax is used. is_string = False arg_type = _infer_type(expr.expr, ctx=ctx) with ctx.new() as subctx: subctx.expr_exposed = False subj = dispatch.compile(expr.expr, ctx=subctx) index = dispatch.compile(expr.index, ctx=subctx) if isinstance(arg_type, s_scalars.ScalarType): b = arg_type.get_topmost_concrete_base() is_string = b.name == 'std::str' one = pgast.Constant(val=1) zero = pgast.Constant(val=0) when_cond = astutils.new_binop(lexpr=index, rexpr=zero, op=ast.ops.LT) index_plus_one = astutils.new_binop(lexpr=index, op=ast.ops.ADD, rexpr=one) if is_string: upper_bound = pgast.FuncCall(name=('char_length', ), args=[subj]) else: upper_bound = pgast.FuncCall(name=('array_upper', ), args=[subj, one]) neg_off = astutils.new_binop(lexpr=upper_bound, rexpr=index_plus_one, op=ast.ops.ADD) when_expr = pgast.CaseWhen(expr=when_cond, result=neg_off) index = pgast.CaseExpr(args=[when_expr], defresult=index_plus_one) if is_string: index = pgast.TypeCast(arg=index, type_name=pgast.TypeName(name=('int', ))) result = pgast.FuncCall(name=('substr', ), args=[subj, index, one]) else: indirection = pgast.Indices(ridx=index) result = pgast.Indirection(arg=subj, indirection=[indirection]) return result
def safe_array_expr(elements: typing.List[pgast.Base], **kwargs) -> pgast.Base: result = pgast.ArrayExpr(elements=elements, **kwargs) if any(el.nullable for el in elements): result = pgast.FuncCall( name=('edgedb', '_nullif_array_nulls'), args=[result], **kwargs, ) return result
def compile_TypeRef(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: if expr.subtypes: raise NotImplementedError() else: result = pgast.FuncCall( name=('edgedb', '_resolve_type_id'), args=[pgast.Constant(val=expr.maintype)], ) return result
def compile_TypeCheckOp(expr: irast.TypeCheckOp, *, ctx: context.CompilerContextLevel) -> pgast.Base: with ctx.new() as newctx: newctx.expr_exposed = False left = dispatch.compile(expr.left, ctx=newctx) right = dispatch.compile(expr.right, ctx=newctx) result = pgast.FuncCall(name=('edgedb', 'issubclass'), args=[left, right]) if expr.op == ast.ops.IS_NOT: result = astutils.new_unop(ast.ops.NOT, result) return result
def prepare_tuple_for_aggregation(expr: pgast.Base, *, env: context.Environment) -> pgast.Base: if env.output_format == context.OutputFormat.JSON: result = expr else: # PostgreSQL sometimes "forgets" the structure of an anonymous # tuple type, and so any attempt to access it would fail with # "record type has not been registered". To combat this, # call BlessTupleDesc() (exposed through the # edgedb.bless_record() function) to register the the tuple # description in the global cache. result = pgast.FuncCall(name=('edgedb', 'bless_record'), args=[expr]) return result
def compile_FunctionCall( expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: if expr.typemod is ql_ft.TypeModifier.SET_OF: raise RuntimeError( 'set returning functions are not supported in simple expressions') args = [dispatch.compile(a, ctx=ctx) for a in expr.args] if expr.has_empty_variadic: var = pgast.TypeCast( arg=pgast.ArrayExpr(elements=[]), type_name=pgast.TypeName( name=pg_types.pg_type_from_object( ctx.env.schema, expr.variadic_param_type) ) ) args.append(pgast.VariadicArgument(expr=var)) if expr.func_sql_function: name = (expr.func_sql_function,) else: name = common.schema_name_to_pg_name(expr.func_shortname) result = pgast.FuncCall(name=name, args=args) if expr.force_return_cast: # The underlying function has a return value type # different from that of the EdgeQL function declaration, # so we need to make an explicit cast here. result = pgast.TypeCast( arg=result, type_name=pgast.TypeName( name=pg_types.pg_type_from_object( ctx.env.schema, expr.stype) ) ) return result
def compile_SliceIndirection( expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: # Handle Expr[Index], where Expr may be std::str, array<T> or # std::json. For strings we translate this into substr calls. # Arrays use the native slice syntax. JSON is handled by a # combination of unnesting aggregation and array slicing. with ctx.new() as subctx: subctx.expr_exposed = False subj = dispatch.compile(expr.expr, ctx=subctx) if expr.start is None: start = pgast.NullConstant() else: start = dispatch.compile(expr.start, ctx=subctx) if expr.stop is None: stop = pgast.NullConstant() else: stop = dispatch.compile(expr.stop, ctx=subctx) # any integer indexes must be upcast into int to fit the helper # function signature start = pgast.TypeCast( arg=start, type_name=pgast.TypeName( name=('int',) ) ) stop = pgast.TypeCast( arg=stop, type_name=pgast.TypeName( name=('int',) ) ) result = pgast.FuncCall( name=('edgedb', '_slice'), args=[subj, start, stop] ) return result
def compile_IndexIndirection( expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: # Handle Expr[Index], where Expr may be std::str, array<T> or # std::json. For strings we translate this into substr calls. # Arrays use the native index access. JSON is handled by using the # `->` accessor. Additionally, in all of the above cases a # boundary-check is performed on the index and an exception is # potentially raised. # line, column and filename are captured here to be used with the # error message srcctx = pgast.StringConstant( val=irutils.get_source_context_as_json(expr.index, errors.InvalidValueError)) with ctx.new() as subctx: subctx.expr_exposed = False subj = dispatch.compile(expr.expr, ctx=subctx) index = dispatch.compile(expr.index, ctx=subctx) # If the index is some integer, cast it into int, because there's # no backend function that handles indexes larger than int. index_t = expr.index.stype int_t = ctx.env.schema.get('std::anyint') if index_t.issubclass(ctx.env.schema, int_t): index = pgast.TypeCast( arg=index, type_name=pgast.TypeName( name=('int',) ) ) result = pgast.FuncCall( name=('edgedb', '_index'), args=[subj, index, srcctx] ) return result
def compile_FunctionCall(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: funcobj = expr.func if funcobj.aggregate: raise RuntimeError( 'aggregate functions are not supported in simple expressions') if funcobj.set_returning: raise RuntimeError( 'set returning functions are not supported in simple expressions') args = [dispatch.compile(a, ctx=ctx) for a in expr.args] if funcobj.from_function: name = (funcobj.from_function, ) else: name = (common.edgedb_module_name_to_schema_name( funcobj.shortname.module), common.edgedb_name_to_pg_name(funcobj.shortname.name)) result = pgast.FuncCall(name=name, args=args) return result
def compile_SliceIndirection(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: # Handle Expr[Start:End], where Expr may be std::str or array<T>. # For strings we translate this into substr calls, whereas # for arrays the native slice syntax is used. with ctx.new() as subctx: subctx.expr_exposed = False subj = dispatch.compile(expr.expr, ctx=subctx) start = dispatch.compile(expr.start, ctx=subctx) stop = dispatch.compile(expr.stop, ctx=subctx) one = pgast.Constant(val=1) zero = pgast.Constant(val=0) is_string = False arg_type = _infer_type(expr.expr, ctx=ctx) if isinstance(arg_type, s_scalars.ScalarType): b = arg_type.get_topmost_concrete_base() is_string = b.name == 'std::str' if is_string: upper_bound = pgast.FuncCall(name=('char_length', ), args=[subj]) else: upper_bound = pgast.FuncCall(name=('array_upper', ), args=[subj, one]) if astutils.is_null_const(start): lower = one else: lower = start when_cond = astutils.new_binop(lexpr=lower, rexpr=zero, op=ast.ops.LT) lower_plus_one = astutils.new_binop(lexpr=lower, rexpr=one, op=ast.ops.ADD) neg_off = astutils.new_binop(lexpr=upper_bound, rexpr=lower_plus_one, op=ast.ops.ADD) when_expr = pgast.CaseWhen(expr=when_cond, result=neg_off) lower = pgast.CaseExpr(args=[when_expr], defresult=lower_plus_one) if astutils.is_null_const(stop): upper = upper_bound else: upper = stop when_cond = astutils.new_binop(lexpr=upper, rexpr=zero, op=ast.ops.LT) neg_off = astutils.new_binop(lexpr=upper_bound, rexpr=upper, op=ast.ops.ADD) when_expr = pgast.CaseWhen(expr=when_cond, result=neg_off) upper = pgast.CaseExpr(args=[when_expr], defresult=upper) if is_string: lower = pgast.TypeCast(arg=lower, type_name=pgast.TypeName(name=('int', ))) args = [subj, lower] if upper is not upper_bound: for_length = astutils.new_binop(lexpr=upper, op=ast.ops.SUB, rexpr=lower) for_length = astutils.new_binop(lexpr=for_length, op=ast.ops.ADD, rexpr=one) for_length = pgast.TypeCast( arg=for_length, type_name=pgast.TypeName(name=('int', ))) args.append(for_length) result = pgast.FuncCall(name=('substr', ), args=args) else: indirection = pgast.Indices(lidx=lower, ridx=upper) result = pgast.Indirection(arg=subj, indirection=[indirection]) return result
def cast(node: pgast.Base, *, source_type: s_obj.Object, target_type: s_obj.Object, force: bool = False, env: context.Environment) -> pgast.Base: if source_type.name == target_type.name and not force: return node schema = env.schema real_t = schema.get('std::anyreal') int_t = schema.get('std::anyint') json_t = schema.get('std::json') str_t = schema.get('std::str') datetime_t = schema.get('std::datetime') bool_t = schema.get('std::bool') if isinstance(target_type, s_types.Collection): if target_type.schema_name == 'array': if source_type.issubclass(json_t): # If we are casting a jsonb array to array, we do the # following transformation: # EdgeQL: <array<T>>MAP_VALUE # SQL: # SELECT array_agg(j::T) # FROM jsonb_array_elements(MAP_VALUE) AS j inner_cast = cast(pgast.ColumnRef(name=['j']), source_type=source_type, target_type=target_type.element_type, env=env) return pgast.SelectStmt( target_list=[ pgast.ResTarget(val=pgast.FuncCall( name=('array_agg', ), args=[inner_cast])) ], from_clause=[ pgast.RangeFunction(functions=[ pgast.FuncCall(name=('jsonb_array_elements', ), args=[node]) ], alias=pgast.Alias(aliasname='j')) ]) else: # EdgeQL: <array<int64>>['1', '2'] # to SQL: ARRAY['1', '2']::int[] elem_pgtype = pg_types.pg_type_from_object( schema, target_type.element_type, topbase=True) return pgast.TypeCast(arg=node, type_name=pgast.TypeName( name=elem_pgtype, array_bounds=[-1])) else: # `target_type` is not a collection. if (source_type.issubclass(datetime_t) and target_type.issubclass(str_t)): # Normalize datetime to text conversion to have the same # format as one would get by serializing to JSON. # # EdgeQL: <text><datetime>'2010-10-10'; # To SQL: trim(to_json('2010-01-01'::timestamptz)::text, '"') return pgast.FuncCall( name=('trim', ), args=[ pgast.TypeCast(arg=pgast.FuncCall(name=('to_json', ), args=[node]), type_name=pgast.TypeName(name=('text', ))), pgast.Constant(val='"') ]) elif source_type.issubclass(bool_t) and target_type.issubclass(int_t): # PostgreSQL 9.6 doesn't allow to cast 'boolean' to any integer # other than int32: # SELECT 'true'::boolean::bigint; # ERROR: cannot cast type boolean to bigint # So we transform EdgeQL: <int64>BOOL # to SQL: BOOL::int::<targetint> return pgast.TypeCast( arg=pgast.TypeCast(arg=node, type_name=pgast.TypeName(name=('int', ))), type_name=pgast.TypeName( name=pg_types.pg_type_from_scalar(schema, target_type))) elif source_type.issubclass(int_t) and target_type.issubclass(bool_t): # PostgreSQL 9.6 doesn't allow to cast any integer other # than int32 to 'boolean': # SELECT 1::bigint::boolean; # ERROR: cannot cast type bigint to boolea # So we transform EdgeQL: <boolean>INT # to SQL: (INT != 0) return astutils.new_binop(node, pgast.Constant(val=0), op=ast.ops.NE) elif source_type.issubclass(json_t): if (target_type.issubclass(real_t) or target_type.issubclass(bool_t)): # Simply cast to text and the to the target type. return cast(cast(node, source_type=source_type, target_type=str_t, env=env), source_type=str_t, target_type=target_type, env=env) elif target_type.issubclass(str_t): # It's not possible to cast jsonb string to text directly, # so we do a trick: # EdgeQL: <str>JSONB_VAL # SQL: array_to_json(ARRAY[JSONB_VAL])->>0 return astutils.new_binop(pgast.FuncCall( name=('array_to_json', ), args=[pgast.ArrayExpr(elements=[node])]), pgast.Constant(val=0), op='->>') elif target_type.issubclass(json_t): return pgast.TypeCast( arg=node, type_name=pgast.TypeName(name=('jsonb', ))) else: const_type = pg_types.pg_type_from_object(schema, target_type, topbase=True) return pgast.TypeCast(arg=node, type_name=pgast.TypeName(name=const_type)) raise RuntimeError( f'could not cast {source_type.name} to {target_type.name}')
def compile_Coalesce(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: with ctx.new() as subctx: pg_args = [dispatch.compile(a, ctx=subctx) for a in expr.args] return pgast.FuncCall(name=('coalesce', ), args=pg_args)
def compile_BinOp(expr: irast.Base, *, ctx: context.CompilerContextLevel) -> pgast.Base: with ctx.new() as newctx: newctx.expr_exposed = False op = expr.op is_bool_op = op in {ast.ops.AND, ast.ops.OR} left = dispatch.compile(expr.left, ctx=newctx) right = dispatch.compile(expr.right, ctx=newctx) if not isinstance(expr.left, irast.EmptySet): left_type = _infer_type(expr.left, ctx=ctx) else: left_type = None if not isinstance(expr.right, irast.EmptySet): right_type = _infer_type(expr.right, ctx=ctx) else: right_type = None if (not isinstance(expr.left, irast.EmptySet) and not isinstance(expr.right, irast.EmptySet)): left_pg_type = pg_types.pg_type_from_object(ctx.env.schema, left_type, True) right_pg_type = pg_types.pg_type_from_object(ctx.env.schema, right_type, True) if (left_pg_type in {('text', ), ('varchar', )} and right_pg_type in {('text', ), ('varchar', )} and op == ast.ops.ADD): op = '||' if isinstance(left_type, s_types.Tuple): left = _tuple_to_row_expr(expr.left, ctx=newctx) left_count = len(left.args) else: left_count = 0 if isinstance(right_type, s_types.Tuple): right = _tuple_to_row_expr(expr.right, ctx=newctx) right_count = len(right.args) else: right_count = 0 if left_count != right_count: # Postgres does not allow comparing rows with # unequal number of entries, but we want to allow # this. Fortunately, we know that such comparison is # always False. result = pgast.Constant(val=False) else: if is_bool_op: # Transform logical operators to force # the correct behaviour with respect to NULLs. # See the OrFilterFunction comment for details. if ctx.clause == 'where': if expr.op == ast.ops.OR: result = pgast.FuncCall(name=('edgedb', '_or'), args=[left, right]) else: # For the purposes of the WHERE clause, # AND operator works correctly, as # it will either return NULL or FALSE, # which both will disqualify the row. result = astutils.new_binop(left, right, op=op) else: # For expressions outside WHERE, we # always want the result to be NULL # if either operand is NULL. bitop = '&' if expr.op == ast.ops.AND else '|' bitcond = astutils.new_binop( lexpr=pgast.TypeCast( arg=left, type_name=pgast.TypeName(name=('int', ))), rexpr=pgast.TypeCast( arg=right, type_name=pgast.TypeName(name=('int', ))), op=bitop) bitcond = pgast.TypeCast( arg=bitcond, type_name=pgast.TypeName(name=('bool', ))) result = bitcond else: result = astutils.new_binop(left, right, op=op) return result
def compile_GroupStmt(stmt: irast.GroupStmt, *, ctx: context.CompilerContextLevel) -> pgast.Query: parent_ctx = ctx with parent_ctx.substmt() as ctx: clauses.init_stmt(stmt, ctx=ctx, parent_ctx=parent_ctx) group_path_id = stmt.group_path_id # Process the GROUP .. BY part into a subquery. with ctx.subrel() as gctx: gctx.expr_exposed = False gquery = gctx.rel pathctx.put_path_bond(gquery, group_path_id) if stmt.path_scope: ctx.path_scope.update( {path_id: gquery for path_id in stmt.path_scope.paths}) relctx.update_scope(stmt.subject, gquery, ctx=gctx) stmt.subject.path_scope = None clauses.compile_output(stmt.subject, ctx=gctx) subj_rvar = pathctx.get_path_rvar(gquery, stmt.subject.path_id, aspect='value', env=gctx.env) relctx.ensure_bond_for_expr(stmt.subject, subj_rvar.query, ctx=gctx) group_paths = set() part_clause = [] for expr in stmt.groupby: with gctx.new() as subctx: partexpr = dispatch.compile(expr, ctx=subctx) part_clause.append(partexpr) group_paths.add(expr.path_id) # Since we will be computing arbitrary expressions # based on the grouped sets, it is more efficient # to compute the "group bond" as a small unique # value than it is to use GROUP BY and aggregate # actual id values into an array. # # To achieve this we use the first_value() window # function while using the GROUP BY clause as # a partition clause. We use the id of the first # object in each partition if GROUP BY input is # a ObjectType, otherwise we generate the id using # row_number(). if isinstance(stmt.subject.scls, s_objtypes.ObjectType): first_val = pathctx.get_path_identity_var(gquery, stmt.subject.path_id, env=ctx.env) else: with ctx.subrel() as subctx: wrapper = subctx.rel gquery_rvar = dbobj.rvar_for_rel(gquery, env=ctx.env) wrapper.from_clause = [gquery_rvar] relctx.pull_path_namespace(target=wrapper, source=gquery_rvar, ctx=subctx) new_part_clause = [] for i, expr in enumerate(part_clause): path_id = stmt.groupby[i].path_id pathctx.put_path_value_var(gquery, path_id, expr, force=True, env=ctx.env) output_ref = pathctx.get_path_value_output(gquery, path_id, env=ctx.env) new_part_clause.append( dbobj.get_column(gquery_rvar, output_ref)) part_clause = new_part_clause first_val = pathctx.get_rvar_path_identity_var( gquery_rvar, stmt.subject.path_id, env=ctx.env) gquery = wrapper pathctx.put_path_bond(gquery, group_path_id) group_id = pgast.FuncCall( name=('first_value', ), args=[first_val], over=pgast.WindowDef(partition_clause=part_clause)) pathctx.put_path_identity_var(gquery, group_path_id, group_id, env=ctx.env) pathctx.put_path_value_var(gquery, group_path_id, group_id, env=ctx.env) group_cte = pgast.CommonTableExpr(query=gquery, name=ctx.env.aliases.get('g')) group_cte_rvar = dbobj.rvar_for_rel(group_cte, env=ctx.env) # Generate another subquery contaning distinct values of # path expressions in BY. with ctx.subrel() as gvctx: gvquery = gvctx.rel relctx.include_rvar(gvquery, group_cte_rvar, path_id=group_path_id, ctx=gvctx) pathctx.put_path_bond(gvquery, group_path_id) for group_set in stmt.groupby: dispatch.visit(group_set, ctx=gvctx) path_id = group_set.path_id if path_id.is_objtype_path(): pathctx.put_path_bond(gvquery, path_id) gvquery.distinct_clause = [ pathctx.get_path_identity_var(gvquery, group_path_id, env=ctx.env) ] for path_id, aspect in list(gvquery.path_rvar_map): if path_id not in group_paths and path_id != group_path_id: gvquery.path_rvar_map.pop((path_id, aspect)) for path_id, aspect in list(gquery.path_rvar_map): if path_id in group_paths: gquery.path_rvar_map.pop((path_id, aspect)) gquery.path_namespace.pop((path_id, aspect), None) gquery.path_outputs.pop((path_id, aspect), None) groupval_cte = pgast.CommonTableExpr(query=gvquery, name=ctx.env.aliases.get('gv')) groupval_cte_rvar = dbobj.rvar_for_rel(groupval_cte, env=ctx.env) o_stmt = stmt.result.expr # process the result expression; with ctx.subrel() as selctx: selquery = selctx.rel outer_id = stmt.result.path_id inner_id = o_stmt.result.path_id relctx.include_specific_rvar(selquery, groupval_cte_rvar, group_path_id, aspect='identity', ctx=ctx) for path_id in group_paths: selctx.path_scope[path_id] = selquery pathctx.put_path_rvar(selquery, path_id, groupval_cte_rvar, aspect='value', env=ctx.env) selctx.group_by_rels = selctx.group_by_rels.copy() selctx.group_by_rels[group_path_id, stmt.subject.path_id] = \ group_cte selquery.view_path_id_map = {outer_id: inner_id} selquery.ctes.append(group_cte) sortoutputs = [] selquery.ctes.append(groupval_cte) clauses.compile_output(o_stmt.result, ctx=selctx) # The WHERE clause selquery.where_clause = astutils.extend_binop( selquery.where_clause, clauses.compile_filter_clause(o_stmt.where, ctx=selctx)) for ir_sortexpr in o_stmt.orderby: alias = ctx.env.aliases.get('s') sexpr = dispatch.compile(ir_sortexpr.expr, ctx=selctx) selquery.target_list.append( pgast.ResTarget(val=sexpr, name=alias)) sortoutputs.append(alias) if not gvquery.target_list: # No values were pulled from the group-values rel, # we must remove the DISTINCT clause to prevent # a syntax error. gvquery.distinct_clause[:] = [] query = ctx.rel result_rvar = dbobj.rvar_for_rel(selquery, lateral=True, env=ctx.env) relctx.include_rvar(query, result_rvar, path_id=outer_id, ctx=ctx) for rt in selquery.target_list: if rt.name is None: rt.name = ctx.env.aliases.get('v') if rt.name not in sortoutputs: query.target_list.append( pgast.ResTarget(val=dbobj.get_column(result_rvar, rt.name), name=rt.name)) for i, expr in enumerate(o_stmt.orderby): sort_ref = dbobj.get_column(result_rvar, sortoutputs[i]) sortexpr = pgast.SortBy(node=sort_ref, dir=expr.direction, nulls=expr.nones_order) query.sort_clause.append(sortexpr) # The OFFSET clause if o_stmt.offset: with ctx.new() as ctx1: ctx1.clause = 'offsetlimit' ctx1.expr_exposed = False query.limit_offset = dispatch.compile(o_stmt.offset, ctx=ctx1) # The LIMIT clause if o_stmt.limit: with ctx.new() as ctx1: ctx1.clause = 'offsetlimit' ctx1.expr_exposed = False query.limit_count = dispatch.compile(o_stmt.limit, ctx=ctx1) clauses.fini_stmt(query, ctx, parent_ctx) return query