def test_agg_by_2(self): test_table = empty_table(10) test_table = test_table.update( ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"]) aggs = [ group(["aggGroup=var"]), avg(["aggAvg=var"]), count_("aggCount"), first(["aggFirst=var"]), last(["aggLast=var"]), max_(["aggMax=var"]), median(["aggMed=var"]), min_(["aggMin=var"]), pct(0.20, ["aggPct=var"]), std(["aggStd=var"]), sum_(["aggSum=var"]), abs_sum(["aggAbsSum=var"]), var(["aggVar=var"]), weighted_avg("var", ["weights"]), ] result_table = test_table.agg_by(aggs, ["grp_id"]) self.assertGreaterEqual(result_table.size, 1) for agg in aggs: result_table = test_table.agg_by(agg, "grp_id") self.assertGreaterEqual(result_table.size, 1)
def test_agg_all_by(self): test_table = empty_table(10) test_table = test_table.update( ["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"]) aggs = [ group(), avg(), first(), last(), max_(), median(), min_(), pct(0.20), std(), sum_(), abs_sum(), var(), weighted_avg("var"), ] for agg in aggs: with self.subTest(agg): result_table = test_table.agg_all_by(agg, ["grp_id"]) self.assertGreaterEqual(result_table.size, 1) # column names in the Aggregation are ignored aggs = [ group(["aggGroup=var"]), avg(["aggAvg=var"]), pct(0.20, ["aggPct=var"]), std(["aggStd=var"]), sum_(["aggSum=var"]), abs_sum(["aggAbsSum=var"]), var(["aggVar=var"]), weighted_avg("var", ["weights"]), ] for agg in aggs: with self.subTest(agg): result_table = test_table.agg_all_by(agg, ["grp_id"]) self.assertGreaterEqual(result_table.size, 1) with self.assertRaises(DHError) as cm: test_table.agg_all_by(count_("aggCount"), "grp_id") self.assertIn("unsupported", cm.exception.root_cause) for agg in aggs: with self.subTest(agg): result_table = test_table.agg_all_by(agg) self.assertEqual(result_table.size, 1)
value_spec=ck.json_spec([('user_id', dh.int_), ('url', dh.string), ('channel', dh.string), ('received_at', dh.DateTime)]), table_type=TableType.Append) pageviews_stg = pageviews \ .update_view([ 'url_path = url.split(`/`)', 'pageview_type = url_path[1]', 'target_id = Long.parseLong(url_path[2])' ]).drop_columns('url_path') purchases_by_item = purchases.agg_by([ agg.sum_(['revenue = purchase_price']), agg.count_('orders'), agg.sum_(['items_sold = quantity']) ], 'item_id') pageviews_by_item = pageviews_stg \ .where(['pageview_type = `products`']) \ .count_by('pageviews', ['item_id = target_id']) item_summary = items \ .view(['item_id = id', 'name', 'category']) \ .natural_join(purchases_by_item, on = ['item_id']) \ .natural_join(pageviews_by_item, on = ['item_id']) \ .drop_columns('item_id') \ .move_columns_down(['revenue', 'pageviews']) \ .update_view(['conversion_rate = orders / (double) pageviews'])