def test_ordered_aggregation(self): imps = Table.from_tag(IMPS) resx = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27') sum_millis = {} for ad_id, millis in resx: if ad_id not in sum_millis: sum_millis[ad_id] = [0, 0] sum_millis[ad_id][0] += millis sum_millis[ad_id][1] += 1 results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27', order_by=2, limit=3, nest=True) self.assertGreater(len(list(results)), 0) lowest = 0 for ad_id, millis, count in results: self.assertLessEqual(lowest, count) lowest = count ad_tup = sum_millis[ad_id] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1]) self.assertEqual(len(list(results)), min(len(sum_millis), 3))
def test_aggregate_join(self): imps = Table.from_tag(IMPS) pix = Table.from_tag(PIXELS) imp_sites = list(select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13')) pix_sites = list(select(pix.site_id, pix.amount, where=pix.date < '2014-01-13')) join = {} for imp_site, imp_ad_id in imp_sites: for pix_site, pix_amount in pix_sites: if imp_site == pix_site: if imp_ad_id not in join: join[imp_ad_id] = [0, 0] join[imp_ad_id][0] += pix_amount join[imp_ad_id][1] += 1 res = select(imps.ad_id, h_sum(pix.amount), h_count(), where=(imps.date < '2014-01-13', pix.date < '2014-01-13'), join=(imps.site_id, pix.site_id)) results = list(res) self.assertEqual(len(results), len(join)) for (ad_id, amount, count) in results: ramount, rcount = join[ad_id] self.assertEqual(ramount, amount) self.assertEqual(rcount, count)
def test_ordered_aggregation(self): imps = Table.from_tag(IMPS) res = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27') resx = [c for c, _ in result_iterator(res)] sum_millis = {} for ad_id, millis in resx: if ad_id not in sum_millis: sum_millis[ad_id] = [0, 0] sum_millis[ad_id][0] += millis sum_millis[ad_id][1] += 1 res = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27', order_by=2, limit=3) results = [c for c, _ in result_iterator(res)] lowest = 0 for ad_id, millis, count in results: self.assertLessEqual(lowest, count) lowest = count ad_tup = sum_millis[ad_id] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1]) self.assertTrue(len(results) == min(len(sum_millis), 3))
def test_aggregate_join(self): imps = Table.from_tag(IMPS) pix = Table.from_tag(PIXELS) imp_sites = [(s, a) for (s, a), _ in result_iterator( select(imps.site_id, imps.ad_id, where=imps.date < '2014-01-13'))] pix_sites = [(s, a) for (s, a), _ in result_iterator( select(pix.site_id, pix.amount, where=pix.date < '2014-01-13'))] join = {} for imp_site, imp_ad_id in imp_sites: for pix_site, pix_amount in pix_sites: if imp_site == pix_site: if imp_ad_id not in join: join[imp_ad_id] = [0, 0] join[imp_ad_id][0] += pix_amount join[imp_ad_id][1] += 1 res = select(imps.ad_id, h_sum(pix.amount), h_count(), where=(imps.date < '2014-01-13', pix.date < '2014-01-13'), join=(imps.site_id, pix.site_id)) results = [(ad_id, amount, count) for (ad_id, amount, count), _ in result_iterator(res)] self.assertTrue(len(results), len(join)) for (ad_id, amount, count) in results: ramount, rcount = join[ad_id] self.assertEqual(ramount, amount) self.assertEqual(rcount, count)
def test_ordered_aggregation(self): imps = Table.from_tag(IMPS) resx = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27') sum_millis = {} for ad_id, millis in resx: if ad_id not in sum_millis: sum_millis[ad_id] = [0, 0] sum_millis[ad_id][0] += millis sum_millis[ad_id][1] += 1 results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27', order_by=2, limit=3, nest=True) self.assertGreater(len(list(results)), 0) lowest = 0 for ad_id, millis, count in results: self.assertLessEqual(lowest, count) lowest = count ad_tup = sum_millis[ad_id] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1]) self.assertEqual(len(list(results)), min(len(sum_millis), 3)) resx.purge()
def test_nested_agg(self): imps = Table.from_tag(IMPS) results = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-22') sum_millis = {} for ad_id, dt, millis in results: key = str(ad_id) + dt if key not in sum_millis: sum_millis[key] = [0, 0] sum_millis[key][0] += millis sum_millis[key][1] += 1 newtab = select(imps.ad_id, imps.date, h_sum(imps.cpm_millis), h_count(), where=imps.date > '2014-01-22', nest=True) results = select(*star(newtab), where=newtab) self.assertGreater(len(list(results)), 0) for ad_id, dt, millis, count in results: ad_tup = sum_millis[str(ad_id) + dt] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1])
def test_nested_self_join(self): """ A self join is joining the table against itself. This requires the use of aliases. """ imps = Table.from_tag(IMPS) early_res = select(imps.ad_id, imps.cpm_millis, where=imps.date < '2014-01-20') early = list(early_res) late_res = select(imps.ad_id, imps.cpm_millis, where=imps.date >= '2014-01-20') late = list(late_res) join = {} for eid, ecpm in early: for lid, lcpm in late: if eid == lid: if eid not in join: join[eid] = [0, 0, 0] join[eid][0] += ecpm join[eid][1] += lcpm join[eid][2] += 1 early = select(imps.ad_id, imps.cpm_millis, where=imps.date < '2014-01-20', nest=True) late = select(imps.ad_id, imps.cpm_millis, where=imps.date >= '2014-01-20', nest=True) jimmy = select(early.ad_id.named('adididid'), h_sum(early.cpm_millis).named('emillis'), h_sum(late.cpm_millis).named('lmillis'), h_count(), where=(early, late), join='ad_id') james = list(jimmy) self.assertEqual(len(join), len(james)) for (ad_id, emillis, lmillis, cnt) in james: ecpm, lcpm, ocnt = join[ad_id] self.assertEqual(emillis, ecpm) self.assertEqual(lmillis, lcpm) self.assertEqual(cnt, ocnt) early_res.purge() late_res.purge() jimmy.purge()
def test_simple_aggregation(self): imps = Table.from_tag(IMPS) results = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27') sum_millis = {} for ad_id, millis in results: if ad_id not in sum_millis: sum_millis[ad_id] = [0, 0] sum_millis[ad_id][0] += millis sum_millis[ad_id][1] += 1 results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27') self.assertGreater(len(list(results)), 0) for ad_id, millis, count in results: ad_tup = sum_millis[ad_id] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1])
def test_multiple_group_bys(self): imps = Table.from_tag(IMPS) results = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-22') sum_millis = {} for ad_id, dt, millis in results: key = str(ad_id) + dt if key not in sum_millis: sum_millis[key] = [0, 0] sum_millis[key][0] += millis sum_millis[key][1] += 1 results = select(imps.ad_id, imps.date, h_sum(imps.cpm_millis), h_count(), where=imps.date > '2014-01-22') self.assertGreater(len(list(results)), 0) for ad_id, dt, millis, count in results: ad_tup = sum_millis[str(ad_id) + dt] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1])
def test_simple_aggregation(self): imps = Table.from_tag(IMPS) res = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27') results = [c for c, _ in result_iterator(res)] sum_millis = {} for ad_id, millis in results: if ad_id not in sum_millis: sum_millis[ad_id] = [0, 0] sum_millis[ad_id][0] += millis sum_millis[ad_id][1] += 1 res = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27') results = [c for c, _ in result_iterator(res)] for ad_id, millis, count in results: ad_tup = sum_millis[ad_id] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1])
def test_nested_agg(self): imps = Table.from_tag(IMPS) results = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-22') sum_millis = {} for ad_id, dt, millis in results: key = str(ad_id) + dt if key not in sum_millis: sum_millis[key] = [0, 0] sum_millis[key][0] += millis sum_millis[key][1] += 1 results.purge() newtab = select(imps.ad_id, imps.date, h_sum(imps.cpm_millis), h_count(), where=imps.date > '2014-01-22', nest=True) results = select(*star(newtab), where=newtab) self.assertGreater(len(list(results)), 0) for ad_id, dt, millis, count in results: ad_tup = sum_millis[str(ad_id) + dt] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1]) results.purge()
def test_simple_aggregation(self): imps = Table.from_tag(IMPS) results = select(imps.ad_id, imps.cpm_millis, where=imps.date == '2014-01-27') sum_millis = {} for ad_id, millis in results: if ad_id not in sum_millis: sum_millis[ad_id] = [0, 0] sum_millis[ad_id][0] += millis sum_millis[ad_id][1] += 1 results.purge() results = select(imps.ad_id, h_sum(imps.cpm_millis), h_count(), where=imps.date == '2014-01-27') self.assertGreater(len(list(results)), 0) for ad_id, millis, count in results: ad_tup = sum_millis[ad_id] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1]) results.purge()
def test_count(self): imps = Table.from_tag(IMPS) res = select(h_count(), where=imps) count = list(res)[0][0] self.assertEqual(count, 200)
def test_count(self): imps = Table.from_tag(IMPS) res = select(h_count(), where=imps) count = list(res)[0][0] res.purge() self.assertEqual(count, 200)
def test_multiple_group_bys(self): imps = Table.from_tag(IMPS) results = select(imps.ad_id, imps.date, imps.cpm_millis, where=imps.date > '2014-01-22') sum_millis = {} for ad_id, dt, millis in results: key = str(ad_id) + dt if key not in sum_millis: sum_millis[key] = [0, 0] sum_millis[key][0] += millis sum_millis[key][1] += 1 results.purge() results = select(imps.ad_id, imps.date, h_sum(imps.cpm_millis), h_count(), where=imps.date > '2014-01-22') self.assertGreater(len(list(results)), 0) for ad_id, dt, millis, count in results: ad_tup = sum_millis[str(ad_id) + dt] self.assertEqual(millis, ad_tup[0]) self.assertEqual(count, ad_tup[1]) results.purge()