def test_summary_sum(self): from ts.flint import summarizers vol = self.vol() expected_pdf = make_pdf([ (0, 7800.0,) ], ["time", "volume_sum"]) new_pdf = vol.summarize(summarizers.sum("volume")).toPandas() assert_same(new_pdf, expected_pdf) expected_pdf = make_pdf([ (0, 7, 4100.0,), (0, 3, 3700.0,), ], ["time", "id", "volume_sum"]) new_pdf = vol.summarize(summarizers.sum("volume"), key=["id"]).toPandas() new_pdf1 = vol.summarize(summarizers.sum("volume"), key="id").toPandas() assert_same(new_pdf, new_pdf1) # XXX: should just do tests_utils.assert_same(new_pdf, expected_pdf, "by id") # once https://gitlab.twosigma.com/analytics/huohua/issues/26 gets resolved. assert_same( new_pdf[new_pdf['id'] == 3].reset_index(drop=True), expected_pdf[expected_pdf['id'] == 3].reset_index(drop=True), "by id 3" ) assert_same( new_pdf[new_pdf['id'] == 7].reset_index(drop=True), expected_pdf[expected_pdf['id'] == 7].reset_index(drop=True), "by id 7" )
def test_summarizeIntervals(self): from ts.flint import summarizers vol = self.vol() clock = self.flintContext.read.pandas( test_utils.make_pdf([ (1000, ), (1100, ), (1200, ), (1300, ), ], ["time"])) new_pdf1 = vol.summarizeIntervals( clock, summarizers.sum("volume")).toPandas() expected_pdf1 = test_utils.make_pdf([ (1000, 1000.0), (1100, 2600.0), (1200, 4200.0), ], ["time", "volume_sum"]) test_utils.assert_same(new_pdf1, expected_pdf1) new_pdf2 = vol.summarizeIntervals(clock, summarizers.sum("volume"), key="id").toPandas() expected_pdf2 = test_utils.make_pdf([ (1000, 7, 500.0), (1000, 3, 500.0), (1100, 3, 1200.0), (1100, 7, 1400.0), (1200, 3, 2000.0), (1200, 7, 2200.0), ], ["time", "id", "volume_sum"]) test_utils.assert_same(new_pdf2, expected_pdf2)
def test_summarizeCycles(self): from ts.flint import summarizers vol = self.vol() vol2 = self.vol2() expected_pdf1 = make_pdf([ (1000, 300.0,), (1050, 700.0,), (1100, 1100.0,), (1150, 1500.0,), (1200, 1900.0,), (1250, 2300.0,), ], ["time", "volume_sum"]) new_pdf1 = vol.summarizeCycles(summarizers.sum("volume")).toPandas() assert_same(new_pdf1, expected_pdf1) expected_pdf2 = make_pdf([ (1000, 7, 200.0), (1000, 3, 400.0), (1050, 3, 600.0), (1050, 7, 800.0), (1100, 3, 1000.0), (1100, 7, 1200.0), (1150, 3, 1400.0), (1150, 7, 1600.0), (1200, 3, 1800.0), (1200, 7, 2000.0), (1250, 3, 2200.0), (1250, 7, 2400.0), ], ["time", "id", "volume_sum"]) new_pdf2 = vol2.summarizeCycles(summarizers.sum("volume"), key="id").toPandas() assert_same(new_pdf2, expected_pdf2)
def test_summarizeIntervals(self): from ts.flint import summarizers vol = self.vol() clock = self.flintContext.read.pandas(make_pdf([ (1000,), (1100,), (1200,), (1300,), ], ["time"])) new_pdf1 = vol.summarizeIntervals(clock, summarizers.sum("volume")).toPandas() expected_pdf1 = make_pdf([ (1100, 1000.0), (1200, 2600.0), (1300, 4200.0), ], ["time", "volume_sum"]) assert_same(new_pdf1, expected_pdf1) new_pdf2 = vol.summarizeIntervals(clock, summarizers.sum("volume"), key="id").toPandas() expected_pdf2 = make_pdf([ (1100, 7, 500.0), (1100, 3, 500.0), (1200, 3, 1200.0), (1200, 7, 1400.0), (1300, 3, 2000.0), (1300, 7, 2200.0), ], ["time", "id", "volume_sum"]) assert_same(new_pdf2, expected_pdf2)
def test_summarizeIntervals(flintContext, tests_utils, summarizers, vol): clock = flintContext.read.pandas( make_pdf([ (1000, ), (1100, ), (1200, ), (1300, ), ], ["time"])) new_pdf1 = vol.summarizeIntervals(clock, summarizers.sum("volume")).toPandas() expected_pdf1 = make_pdf([ (1000, 1000.0), (1100, 2600.0), (1200, 4200.0), ], ["time", "volume_sum"]) tests_utils.assert_same(new_pdf1, expected_pdf1) new_pdf2 = vol.summarizeIntervals(clock, summarizers.sum("volume"), key="id").toPandas() expected_pdf2 = make_pdf([ (1000, 7, 500.0), (1000, 3, 500.0), (1100, 3, 1200.0), (1100, 7, 1400.0), (1200, 3, 2000.0), (1200, 7, 2200.0), ], ["time", "id", "volume_sum"]) tests_utils.assert_same(new_pdf2, expected_pdf2)
def test_summarizeCycles(self): from ts.flint import summarizers vol = self.vol() vol2 = self.vol2() expected_pdf1 = make_pdf([ ( 1000, 300.0, ), ( 1050, 700.0, ), ( 1100, 1100.0, ), ( 1150, 1500.0, ), ( 1200, 1900.0, ), ( 1250, 2300.0, ), ], ["time", "volume_sum"]) new_pdf1 = vol.summarizeCycles(summarizers.sum("volume")).toPandas() assert_same(new_pdf1, expected_pdf1) expected_pdf2 = make_pdf([ (1000, 7, 200.0), (1000, 3, 400.0), (1050, 3, 600.0), (1050, 7, 800.0), (1100, 3, 1000.0), (1100, 7, 1200.0), (1150, 3, 1400.0), (1150, 7, 1600.0), (1200, 3, 1800.0), (1200, 7, 2000.0), (1250, 3, 2200.0), (1250, 7, 2400.0), ], ["time", "id", "volume_sum"]) new_pdf2 = vol2.summarizeCycles(summarizers.sum("volume"), key="id").toPandas() assert_same(new_pdf2, expected_pdf2)
def test_summarizeCycles(summarizers, tests_utils, vol, vol2): expected_pdf1 = make_pdf([ ( 1000, 300.0, ), ( 1050, 700.0, ), ( 1100, 1100.0, ), ( 1150, 1500.0, ), ( 1200, 1900.0, ), ( 1250, 2300.0, ), ], ["time", "volume_sum"]) new_pdf1 = vol.summarizeCycles(summarizers.sum("volume")).toPandas() tests_utils.assert_same(new_pdf1, expected_pdf1) expected_pdf2 = make_pdf([ (1000, 7, 200.0), (1000, 3, 400.0), (1050, 3, 600.0), (1050, 7, 800.0), (1100, 3, 1000.0), (1100, 7, 1200.0), (1150, 3, 1400.0), (1150, 7, 1600.0), (1200, 3, 1800.0), (1200, 7, 2000.0), (1250, 3, 2200.0), (1250, 7, 2400.0), ], ["time", "id", "volume_sum"]) new_pdf2 = vol2.summarizeCycles(summarizers.sum("volume"), key="id").toPandas() tests_utils.assert_same(new_pdf2, expected_pdf2)
def test_summarizeWindows(self): from ts.flint import windows from ts.flint import summarizers vol = self.vol() w = windows.past_absolute_time('99s') new_pdf1 = vol.summarizeWindows(w, summarizers.sum("volume")).toPandas() expected_pdf1 = make_pdf([ (1000, 7, 100, 300.0), (1000, 3, 200, 300.0), (1050, 3, 300, 1000.0), (1050, 7, 400, 1000.0), (1100, 3, 500, 1800.0), (1100, 7, 600, 1800.0), (1150, 3, 700, 2600.0), (1150, 7, 800, 2600.0), (1200, 3, 900, 3400.0), (1200, 7, 1000, 3400.0), (1250, 3, 1100, 4200.0), (1250, 7, 1200, 4200.0), ], ["time", "id", "volume", "volume_sum"]) assert_same(new_pdf1, expected_pdf1) new_pdf2 = (vol.summarizeWindows(w, summarizers.sum("volume"), key="id").toPandas()) expected_pdf2 = make_pdf([ (1000, 7, 100, 100.0), (1000, 3, 200, 200.0), (1050, 3, 300, 500.0), (1050, 7, 400, 500.0), (1100, 3, 500, 800.0), (1100, 7, 600, 1000.0), (1150, 3, 700, 1200.0), (1150, 7, 800, 1400.0), (1200, 3, 900, 1600.0), (1200, 7, 1000, 1800.0), (1250, 3, 1100, 2000.0), (1250, 7, 1200, 2200.0), ], ["time", "id", "volume", "volume_sum"]) assert_same(new_pdf2, expected_pdf2)
def test_summary_sum(self): from ts.flint import summarizers vol = self.vol() expected_pdf = test_utils.make_pdf([( 0, 7800.0, )], ["time", "volume_sum"]) new_pdf = vol.summarize(summarizers.sum("volume")).toPandas() test_utils.assert_same(new_pdf, expected_pdf) expected_pdf = test_utils.make_pdf([ ( 0, 7, 4100.0, ), ( 0, 3, 3700.0, ), ], ["time", "id", "volume_sum"]) new_pdf = vol.summarize(summarizers.sum("volume"), key=["id"]).toPandas() new_pdf1 = vol.summarize(summarizers.sum("volume"), key="id").toPandas() test_utils.assert_same(new_pdf, new_pdf1) # XXX: should just do: # test_utils.assert_same(new_pdf, expected_pdf, "by id") # once https://gitlab.twosigma.com/analytics/huohua/issues/26 # gets resolved. test_utils.assert_same( new_pdf[new_pdf['id'] == 3].reset_index(drop=True), expected_pdf[expected_pdf['id'] == 3].reset_index(drop=True), "by id 3") test_utils.assert_same( new_pdf[new_pdf['id'] == 7].reset_index(drop=True), expected_pdf[expected_pdf['id'] == 7].reset_index(drop=True), "by id 7")
def test_addSummaryColumns(self): from ts.flint import summarizers vol = self.vol() expected_pdf = make_pdf([ (1000, 7, 100, 100.0), (1000, 3, 200, 300.0), (1050, 3, 300, 600.0), (1050, 7, 400, 1000.0), (1100, 3, 500, 1500.0), (1100, 7, 600, 2100.0), (1150, 3, 700, 2800.0), (1150, 7, 800, 3600.0), (1200, 3, 900, 4500.0), (1200, 7, 1000, 5500.0), (1250, 3, 1100, 6600.0), (1250, 7, 1200, 7800.0), ], ["time", "id", "volume", "volume_sum"]) new_pdf = vol.addSummaryColumns(summarizers.sum("volume")).toPandas() assert_same(new_pdf, expected_pdf) expected_pdf = make_pdf([ (1000, 7, 100, 100.0), (1000, 3, 200, 200.0), (1050, 3, 300, 500.0), (1050, 7, 400, 500.0), (1100, 3, 500, 1000.0), (1100, 7, 600, 1100.0), (1150, 3, 700, 1700.0), (1150, 7, 800, 1900.0), (1200, 3, 900, 2600.0), (1200, 7, 1000, 2900.0), (1250, 3, 1100, 3700.0), (1250, 7, 1200, 4100.0), ], ["time", "id", "volume", "volume_sum"]) new_pdf = vol.addSummaryColumns(summarizers.sum("volume"), "id").toPandas() assert_same(new_pdf, expected_pdf, "with key")
def test_addSummaryColumns(summarizers, tests_utils, vol): expected_pdf = make_pdf([ (1000, 7, 100, 100.0), (1000, 3, 200, 300.0), (1050, 3, 300, 600.0), (1050, 7, 400, 1000.0), (1100, 3, 500, 1500.0), (1100, 7, 600, 2100.0), (1150, 3, 700, 2800.0), (1150, 7, 800, 3600.0), (1200, 3, 900, 4500.0), (1200, 7, 1000, 5500.0), (1250, 3, 1100, 6600.0), (1250, 7, 1200, 7800.0), ], ["time", "id", "volume", "volume_sum"]) new_pdf = vol.addSummaryColumns(summarizers.sum("volume")).toPandas() tests_utils.assert_same(new_pdf, expected_pdf) expected_pdf = make_pdf([ (1000, 7, 100, 100.0), (1000, 3, 200, 200.0), (1050, 3, 300, 500.0), (1050, 7, 400, 500.0), (1100, 3, 500, 1000.0), (1100, 7, 600, 1100.0), (1150, 3, 700, 1700.0), (1150, 7, 800, 1900.0), (1200, 3, 900, 2600.0), (1200, 7, 1000, 2900.0), (1250, 3, 1100, 3700.0), (1250, 7, 1200, 4100.0), ], ["time", "id", "volume", "volume_sum"]) new_pdf = vol.addSummaryColumns(summarizers.sum("volume"), "id").toPandas() tests_utils.assert_same(new_pdf, expected_pdf, "with key")
def test_summarizeWindows(flintContext, tests_utils, windows, summarizers, vol): new_pdf1 = vol.summarizeWindows(windows.past_absolute_time('99ns'), summarizers.sum("volume")).toPandas() expected_pdf1 = make_pdf([ (1000, 7, 100, 300.0), (1000, 3, 200, 300.0), (1050, 3, 300, 1000.0), (1050, 7, 400, 1000.0), (1100, 3, 500, 1800.0), (1100, 7, 600, 1800.0), (1150, 3, 700, 2600.0), (1150, 7, 800, 2600.0), (1200, 3, 900, 3400.0), (1200, 7, 1000, 3400.0), (1250, 3, 1100, 4200.0), (1250, 7, 1200, 4200.0), ], ["time", "id", "volume", "volume_sum"]) tests_utils.assert_same(new_pdf1, expected_pdf1) new_pdf2 = (vol.summarizeWindows(windows.past_absolute_time('99ns'), summarizers.sum("volume"), key="id").toPandas()) expected_pdf2 = make_pdf([ (1000, 7, 100, 100.0), (1000, 3, 200, 200.0), (1050, 3, 300, 500.0), (1050, 7, 400, 500.0), (1100, 3, 500, 800.0), (1100, 7, 600, 1000.0), (1150, 3, 700, 1200.0), (1150, 7, 800, 1400.0), (1200, 3, 900, 1600.0), (1200, 7, 1000, 1800.0), (1250, 3, 1100, 2000.0), (1250, 7, 1200, 2200.0), ], ["time", "id", "volume", "volume_sum"]) tests_utils.assert_same(new_pdf2, expected_pdf2) interval_with_id = flintContext.read.pandas( make_pdf([ (1000, 3), (1000, 7), (1050, 3), (1050, 7), (1100, 3), (1150, 3), (1150, 7), (1200, 3), (1200, 7), (1250, 7), ], ["time", "id"])) new_pdf3 = (interval_with_id.summarizeWindows( windows.past_absolute_time('99ns'), summarizers.sum("volume"), key="id", other=vol).toPandas()) expected_pdf3 = make_pdf([ (1000, 3, 200.0), (1000, 7, 100.0), (1050, 3, 500.0), (1050, 7, 500.0), (1100, 3, 800.0), (1150, 3, 1200.0), (1150, 7, 1400.0), (1200, 3, 1600.0), (1200, 7, 1800.0), (1250, 7, 2200.0), ], ["time", "id", "volume_sum"]) tests_utils.assert_same(new_pdf3, expected_pdf3)