def exact_aggregates(self, query): child_scope = Scope() if self.options.row_privacy: keycount_expr = AggFunction("COUNT", None, AllColumns()) else: key_col = self.key_col(query) keycount_expr = AggFunction("COUNT", "DISTINCT", Column(key_col)) child_scope.push_name(keycount_expr.expression) for ne in query.select.namedExpressions: child_scope.push_name(ne.expression) keycount = NamedExpression("keycount", keycount_expr) select = Seq([keycount] + [ne for ne in query.select.namedExpressions]) select = Select(None, select) subquery = Query(child_scope.select(), query.source, query.where, query.agg, None, None, None) if self.options.reservoir_sample and not self.options.row_privacy: subquery = self.per_key_random(subquery) subquery = [AliasedRelation(subquery, "per_key_random")] filtered = Where(BooleanCompare(Column("per_key_random.row_num"), "<=", Literal(str(self.options.max_contrib), self.options.max_contrib))) return Query(select, From(subquery), filtered, query.agg, None, None, None) else: subquery = self.per_key_clamped(subquery) subquery = [AliasedRelation(subquery, "per_key_all")] return Query(select, From(subquery), None, query.agg, None, None, None)
def calculate_variance(self, exp, scope): """ Calculate the variance from avg of squares and square of averages """ expr = exp.expression quant = exp.quantifier avg_of_square = self.calculate_avg(AggFunction("AVG", quant, ArithmeticExpression(expr, '*', expr)), scope) avg = self.calculate_avg(AggFunction("AVG", quant, expr), scope) avg_squared = ArithmeticExpression(avg, '*', avg) new_exp = ArithmeticExpression(avg_of_square, "-", avg_squared) return new_exp
def calculate_avg(self, exp, scope): """ Takes an expression for a noisy mean and rewrites to a noisy sum and a noisy count """ expr = exp.expression quant = exp.quantifier sum_expr = self.push_sum_or_count(AggFunction("SUM", quant, expr), scope) count_expr = self.push_sum_or_count(AggFunction("COUNT", quant, expr), scope) new_exp = NestedExpression(ArithmeticExpression(sum_expr, "/", count_expr)) return new_exp
def calculate_stddev(self, exp, scope): """ Calculate the standard deviation from the variance """ expr = AggFunction('STD', exp.quantifier, exp.expression) var_expr = self.calculate_variance(expr, scope) new_exp = MathFunction("SQRT", var_expr) return new_exp
def push_sum_or_count(self, exp, scope): """ Push a sum or count expression to child scope and convert to a sum """ new_name = scope.push_name(AggFunction(exp.name, exp.quantifier, exp.expression)) new_exp = Column(new_name) return new_exp