示例#1
0
    def test_summary_sum(self):
        from ts.flint import summarizers

        vol = self.vol()

        expected_pdf = make_pdf([
            (0, 7800.0,)
        ], ["time", "volume_sum"])

        new_pdf = vol.summarize(summarizers.sum("volume")).toPandas()
        assert_same(new_pdf, expected_pdf)

        expected_pdf = make_pdf([
            (0, 7, 4100.0,),
            (0, 3, 3700.0,),
        ], ["time", "id", "volume_sum"])

        new_pdf = vol.summarize(summarizers.sum("volume"), key=["id"]).toPandas()
        new_pdf1 = vol.summarize(summarizers.sum("volume"), key="id").toPandas()
        assert_same(new_pdf, new_pdf1)

        # XXX: should just do tests_utils.assert_same(new_pdf, expected_pdf, "by id")
        # once https://gitlab.twosigma.com/analytics/huohua/issues/26 gets resolved.
        assert_same(
            new_pdf[new_pdf['id'] == 3].reset_index(drop=True),
            expected_pdf[expected_pdf['id'] == 3].reset_index(drop=True),
            "by id 3"
        )
        assert_same(
            new_pdf[new_pdf['id'] == 7].reset_index(drop=True),
            expected_pdf[expected_pdf['id'] == 7].reset_index(drop=True),
            "by id 7"
        )
示例#2
0
    def test_summarizeIntervals(self):
        from ts.flint import summarizers
        vol = self.vol()
        clock = self.flintContext.read.pandas(
            test_utils.make_pdf([
                (1000, ),
                (1100, ),
                (1200, ),
                (1300, ),
            ], ["time"]))

        new_pdf1 = vol.summarizeIntervals(
            clock, summarizers.sum("volume")).toPandas()
        expected_pdf1 = test_utils.make_pdf([
            (1000, 1000.0),
            (1100, 2600.0),
            (1200, 4200.0),
        ], ["time", "volume_sum"])
        test_utils.assert_same(new_pdf1, expected_pdf1)

        new_pdf2 = vol.summarizeIntervals(clock,
                                          summarizers.sum("volume"),
                                          key="id").toPandas()
        expected_pdf2 = test_utils.make_pdf([
            (1000, 7, 500.0),
            (1000, 3, 500.0),
            (1100, 3, 1200.0),
            (1100, 7, 1400.0),
            (1200, 3, 2000.0),
            (1200, 7, 2200.0),
        ], ["time", "id", "volume_sum"])

        test_utils.assert_same(new_pdf2, expected_pdf2)
示例#3
0
    def test_summarizeCycles(self):
        from ts.flint import summarizers

        vol = self.vol()
        vol2 = self.vol2()

        expected_pdf1 = make_pdf([
            (1000, 300.0,),
            (1050, 700.0,),
            (1100, 1100.0,),
            (1150, 1500.0,),
            (1200, 1900.0,),
            (1250, 2300.0,),
        ], ["time", "volume_sum"])
        new_pdf1 = vol.summarizeCycles(summarizers.sum("volume")).toPandas()
        assert_same(new_pdf1, expected_pdf1)

        expected_pdf2 = make_pdf([
            (1000, 7, 200.0),
            (1000, 3, 400.0),
            (1050, 3, 600.0),
            (1050, 7, 800.0),
            (1100, 3, 1000.0),
            (1100, 7, 1200.0),
            (1150, 3, 1400.0),
            (1150, 7, 1600.0),
            (1200, 3, 1800.0),
            (1200, 7, 2000.0),
            (1250, 3, 2200.0),
            (1250, 7, 2400.0),
        ], ["time", "id", "volume_sum"])

        new_pdf2 = vol2.summarizeCycles(summarizers.sum("volume"), key="id").toPandas()
        assert_same(new_pdf2, expected_pdf2)
示例#4
0
    def test_summarizeIntervals(self):
        from ts.flint import summarizers

        vol = self.vol()

        clock = self.flintContext.read.pandas(make_pdf([
            (1000,),
            (1100,),
            (1200,),
            (1300,),
        ], ["time"]))

        new_pdf1 = vol.summarizeIntervals(clock, summarizers.sum("volume")).toPandas()
        expected_pdf1 = make_pdf([
            (1100, 1000.0),
            (1200, 2600.0),
            (1300, 4200.0),
        ], ["time", "volume_sum"])

        assert_same(new_pdf1, expected_pdf1)

        new_pdf2 = vol.summarizeIntervals(clock, summarizers.sum("volume"), key="id").toPandas()
        expected_pdf2 = make_pdf([
            (1100, 7, 500.0),
            (1100, 3, 500.0),
            (1200, 3, 1200.0),
            (1200, 7, 1400.0),
            (1300, 3, 2000.0),
            (1300, 7, 2200.0),
        ], ["time", "id", "volume_sum"])

        assert_same(new_pdf2, expected_pdf2)
示例#5
0
def test_summarizeIntervals(flintContext, tests_utils, summarizers, vol):
    clock = flintContext.read.pandas(
        make_pdf([
            (1000, ),
            (1100, ),
            (1200, ),
            (1300, ),
        ], ["time"]))

    new_pdf1 = vol.summarizeIntervals(clock,
                                      summarizers.sum("volume")).toPandas()
    expected_pdf1 = make_pdf([
        (1000, 1000.0),
        (1100, 2600.0),
        (1200, 4200.0),
    ], ["time", "volume_sum"])
    tests_utils.assert_same(new_pdf1, expected_pdf1)

    new_pdf2 = vol.summarizeIntervals(clock,
                                      summarizers.sum("volume"),
                                      key="id").toPandas()
    expected_pdf2 = make_pdf([
        (1000, 7, 500.0),
        (1000, 3, 500.0),
        (1100, 3, 1200.0),
        (1100, 7, 1400.0),
        (1200, 3, 2000.0),
        (1200, 7, 2200.0),
    ], ["time", "id", "volume_sum"])

    tests_utils.assert_same(new_pdf2, expected_pdf2)
示例#6
0
    def test_summarizeCycles(self):
        from ts.flint import summarizers

        vol = self.vol()
        vol2 = self.vol2()

        expected_pdf1 = make_pdf([
            (
                1000,
                300.0,
            ),
            (
                1050,
                700.0,
            ),
            (
                1100,
                1100.0,
            ),
            (
                1150,
                1500.0,
            ),
            (
                1200,
                1900.0,
            ),
            (
                1250,
                2300.0,
            ),
        ], ["time", "volume_sum"])
        new_pdf1 = vol.summarizeCycles(summarizers.sum("volume")).toPandas()
        assert_same(new_pdf1, expected_pdf1)

        expected_pdf2 = make_pdf([
            (1000, 7, 200.0),
            (1000, 3, 400.0),
            (1050, 3, 600.0),
            (1050, 7, 800.0),
            (1100, 3, 1000.0),
            (1100, 7, 1200.0),
            (1150, 3, 1400.0),
            (1150, 7, 1600.0),
            (1200, 3, 1800.0),
            (1200, 7, 2000.0),
            (1250, 3, 2200.0),
            (1250, 7, 2400.0),
        ], ["time", "id", "volume_sum"])

        new_pdf2 = vol2.summarizeCycles(summarizers.sum("volume"),
                                        key="id").toPandas()
        assert_same(new_pdf2, expected_pdf2)
示例#7
0
def test_summarizeCycles(summarizers, tests_utils, vol, vol2):
    expected_pdf1 = make_pdf([
        (
            1000,
            300.0,
        ),
        (
            1050,
            700.0,
        ),
        (
            1100,
            1100.0,
        ),
        (
            1150,
            1500.0,
        ),
        (
            1200,
            1900.0,
        ),
        (
            1250,
            2300.0,
        ),
    ], ["time", "volume_sum"])
    new_pdf1 = vol.summarizeCycles(summarizers.sum("volume")).toPandas()
    tests_utils.assert_same(new_pdf1, expected_pdf1)

    expected_pdf2 = make_pdf([
        (1000, 7, 200.0),
        (1000, 3, 400.0),
        (1050, 3, 600.0),
        (1050, 7, 800.0),
        (1100, 3, 1000.0),
        (1100, 7, 1200.0),
        (1150, 3, 1400.0),
        (1150, 7, 1600.0),
        (1200, 3, 1800.0),
        (1200, 7, 2000.0),
        (1250, 3, 2200.0),
        (1250, 7, 2400.0),
    ], ["time", "id", "volume_sum"])
    new_pdf2 = vol2.summarizeCycles(summarizers.sum("volume"),
                                    key="id").toPandas()
    tests_utils.assert_same(new_pdf2, expected_pdf2)
示例#8
0
    def test_summarizeWindows(self):
        from ts.flint import windows
        from ts.flint import summarizers

        vol = self.vol()

        w = windows.past_absolute_time('99s')

        new_pdf1 = vol.summarizeWindows(w,
                                        summarizers.sum("volume")).toPandas()
        expected_pdf1 = make_pdf([
            (1000, 7, 100, 300.0),
            (1000, 3, 200, 300.0),
            (1050, 3, 300, 1000.0),
            (1050, 7, 400, 1000.0),
            (1100, 3, 500, 1800.0),
            (1100, 7, 600, 1800.0),
            (1150, 3, 700, 2600.0),
            (1150, 7, 800, 2600.0),
            (1200, 3, 900, 3400.0),
            (1200, 7, 1000, 3400.0),
            (1250, 3, 1100, 4200.0),
            (1250, 7, 1200, 4200.0),
        ], ["time", "id", "volume", "volume_sum"])
        assert_same(new_pdf1, expected_pdf1)

        new_pdf2 = (vol.summarizeWindows(w,
                                         summarizers.sum("volume"),
                                         key="id").toPandas())
        expected_pdf2 = make_pdf([
            (1000, 7, 100, 100.0),
            (1000, 3, 200, 200.0),
            (1050, 3, 300, 500.0),
            (1050, 7, 400, 500.0),
            (1100, 3, 500, 800.0),
            (1100, 7, 600, 1000.0),
            (1150, 3, 700, 1200.0),
            (1150, 7, 800, 1400.0),
            (1200, 3, 900, 1600.0),
            (1200, 7, 1000, 1800.0),
            (1250, 3, 1100, 2000.0),
            (1250, 7, 1200, 2200.0),
        ], ["time", "id", "volume", "volume_sum"])
        assert_same(new_pdf2, expected_pdf2)
示例#9
0
    def test_summarizeWindows(self):
        from ts.flint import windows
        from ts.flint import summarizers

        vol = self.vol()

        w = windows.past_absolute_time('99s')

        new_pdf1 = vol.summarizeWindows(w, summarizers.sum("volume")).toPandas()
        expected_pdf1 = make_pdf([
            (1000, 7, 100, 300.0),
            (1000, 3, 200, 300.0),
            (1050, 3, 300, 1000.0),
            (1050, 7, 400, 1000.0),
            (1100, 3, 500, 1800.0),
            (1100, 7, 600, 1800.0),
            (1150, 3, 700, 2600.0),
            (1150, 7, 800, 2600.0),
            (1200, 3, 900, 3400.0),
            (1200, 7, 1000, 3400.0),
            (1250, 3, 1100, 4200.0),
            (1250, 7, 1200, 4200.0),
        ], ["time", "id", "volume", "volume_sum"])
        assert_same(new_pdf1, expected_pdf1)

        new_pdf2 = (vol.summarizeWindows(w,
                                         summarizers.sum("volume"),
                                         key="id").toPandas())
        expected_pdf2 = make_pdf([
            (1000, 7, 100, 100.0),
            (1000, 3, 200, 200.0),
            (1050, 3, 300, 500.0),
            (1050, 7, 400, 500.0),
            (1100, 3, 500, 800.0),
            (1100, 7, 600, 1000.0),
            (1150, 3, 700, 1200.0),
            (1150, 7, 800, 1400.0),
            (1200, 3, 900, 1600.0),
            (1200, 7, 1000, 1800.0),
            (1250, 3, 1100, 2000.0),
            (1250, 7, 1200, 2200.0),
        ], ["time", "id", "volume", "volume_sum"])
        assert_same(new_pdf2, expected_pdf2)
示例#10
0
    def test_summary_sum(self):
        from ts.flint import summarizers
        vol = self.vol()
        expected_pdf = test_utils.make_pdf([(
            0,
            7800.0,
        )], ["time", "volume_sum"])

        new_pdf = vol.summarize(summarizers.sum("volume")).toPandas()
        test_utils.assert_same(new_pdf, expected_pdf)

        expected_pdf = test_utils.make_pdf([
            (
                0,
                7,
                4100.0,
            ),
            (
                0,
                3,
                3700.0,
            ),
        ], ["time", "id", "volume_sum"])

        new_pdf = vol.summarize(summarizers.sum("volume"),
                                key=["id"]).toPandas()
        new_pdf1 = vol.summarize(summarizers.sum("volume"),
                                 key="id").toPandas()
        test_utils.assert_same(new_pdf, new_pdf1)

        # XXX: should just do:
        # test_utils.assert_same(new_pdf, expected_pdf, "by id")
        # once https://gitlab.twosigma.com/analytics/huohua/issues/26
        # gets resolved.
        test_utils.assert_same(
            new_pdf[new_pdf['id'] == 3].reset_index(drop=True),
            expected_pdf[expected_pdf['id'] == 3].reset_index(drop=True),
            "by id 3")
        test_utils.assert_same(
            new_pdf[new_pdf['id'] == 7].reset_index(drop=True),
            expected_pdf[expected_pdf['id'] == 7].reset_index(drop=True),
            "by id 7")
示例#11
0
    def test_addSummaryColumns(self):
        from ts.flint import summarizers

        vol = self.vol()

        expected_pdf = make_pdf([
            (1000, 7, 100, 100.0),
            (1000, 3, 200, 300.0),
            (1050, 3, 300, 600.0),
            (1050, 7, 400, 1000.0),
            (1100, 3, 500, 1500.0),
            (1100, 7, 600, 2100.0),
            (1150, 3, 700, 2800.0),
            (1150, 7, 800, 3600.0),
            (1200, 3, 900, 4500.0),
            (1200, 7, 1000, 5500.0),
            (1250, 3, 1100, 6600.0),
            (1250, 7, 1200, 7800.0),
        ], ["time", "id", "volume", "volume_sum"])

        new_pdf = vol.addSummaryColumns(summarizers.sum("volume")).toPandas()
        assert_same(new_pdf, expected_pdf)

        expected_pdf = make_pdf([
            (1000, 7, 100, 100.0),
            (1000, 3, 200, 200.0),
            (1050, 3, 300, 500.0),
            (1050, 7, 400, 500.0),
            (1100, 3, 500, 1000.0),
            (1100, 7, 600, 1100.0),
            (1150, 3, 700, 1700.0),
            (1150, 7, 800, 1900.0),
            (1200, 3, 900, 2600.0),
            (1200, 7, 1000, 2900.0),
            (1250, 3, 1100, 3700.0),
            (1250, 7, 1200, 4100.0),
        ], ["time", "id", "volume", "volume_sum"])

        new_pdf = vol.addSummaryColumns(summarizers.sum("volume"),
                                        "id").toPandas()
        assert_same(new_pdf, expected_pdf, "with key")
示例#12
0
    def test_addSummaryColumns(self):
        from ts.flint import summarizers

        vol = self.vol()

        expected_pdf = make_pdf([
            (1000, 7, 100, 100.0),
            (1000, 3, 200, 300.0),
            (1050, 3, 300, 600.0),
            (1050, 7, 400, 1000.0),
            (1100, 3, 500, 1500.0),
            (1100, 7, 600, 2100.0),
            (1150, 3, 700, 2800.0),
            (1150, 7, 800, 3600.0),
            (1200, 3, 900, 4500.0),
            (1200, 7, 1000, 5500.0),
            (1250, 3, 1100, 6600.0),
            (1250, 7, 1200, 7800.0),
        ], ["time", "id", "volume", "volume_sum"])

        new_pdf = vol.addSummaryColumns(summarizers.sum("volume")).toPandas()
        assert_same(new_pdf, expected_pdf)

        expected_pdf = make_pdf([
            (1000, 7, 100, 100.0),
            (1000, 3, 200, 200.0),
            (1050, 3, 300, 500.0),
            (1050, 7, 400, 500.0),
            (1100, 3, 500, 1000.0),
            (1100, 7, 600, 1100.0),
            (1150, 3, 700, 1700.0),
            (1150, 7, 800, 1900.0),
            (1200, 3, 900, 2600.0),
            (1200, 7, 1000, 2900.0),
            (1250, 3, 1100, 3700.0),
            (1250, 7, 1200, 4100.0),
        ], ["time", "id", "volume", "volume_sum"])

        new_pdf = vol.addSummaryColumns(summarizers.sum("volume"), "id").toPandas()
        assert_same(new_pdf, expected_pdf, "with key")
示例#13
0
def test_addSummaryColumns(summarizers, tests_utils, vol):
    expected_pdf = make_pdf([
        (1000, 7, 100, 100.0),
        (1000, 3, 200, 300.0),
        (1050, 3, 300, 600.0),
        (1050, 7, 400, 1000.0),
        (1100, 3, 500, 1500.0),
        (1100, 7, 600, 2100.0),
        (1150, 3, 700, 2800.0),
        (1150, 7, 800, 3600.0),
        (1200, 3, 900, 4500.0),
        (1200, 7, 1000, 5500.0),
        (1250, 3, 1100, 6600.0),
        (1250, 7, 1200, 7800.0),
    ], ["time", "id", "volume", "volume_sum"])

    new_pdf = vol.addSummaryColumns(summarizers.sum("volume")).toPandas()
    tests_utils.assert_same(new_pdf, expected_pdf)

    expected_pdf = make_pdf([
        (1000, 7, 100, 100.0),
        (1000, 3, 200, 200.0),
        (1050, 3, 300, 500.0),
        (1050, 7, 400, 500.0),
        (1100, 3, 500, 1000.0),
        (1100, 7, 600, 1100.0),
        (1150, 3, 700, 1700.0),
        (1150, 7, 800, 1900.0),
        (1200, 3, 900, 2600.0),
        (1200, 7, 1000, 2900.0),
        (1250, 3, 1100, 3700.0),
        (1250, 7, 1200, 4100.0),
    ], ["time", "id", "volume", "volume_sum"])

    new_pdf = vol.addSummaryColumns(summarizers.sum("volume"), "id").toPandas()
    tests_utils.assert_same(new_pdf, expected_pdf, "with key")
示例#14
0
def test_summarizeWindows(flintContext, tests_utils, windows, summarizers,
                          vol):
    new_pdf1 = vol.summarizeWindows(windows.past_absolute_time('99ns'),
                                    summarizers.sum("volume")).toPandas()
    expected_pdf1 = make_pdf([
        (1000, 7, 100, 300.0),
        (1000, 3, 200, 300.0),
        (1050, 3, 300, 1000.0),
        (1050, 7, 400, 1000.0),
        (1100, 3, 500, 1800.0),
        (1100, 7, 600, 1800.0),
        (1150, 3, 700, 2600.0),
        (1150, 7, 800, 2600.0),
        (1200, 3, 900, 3400.0),
        (1200, 7, 1000, 3400.0),
        (1250, 3, 1100, 4200.0),
        (1250, 7, 1200, 4200.0),
    ], ["time", "id", "volume", "volume_sum"])
    tests_utils.assert_same(new_pdf1, expected_pdf1)

    new_pdf2 = (vol.summarizeWindows(windows.past_absolute_time('99ns'),
                                     summarizers.sum("volume"),
                                     key="id").toPandas())
    expected_pdf2 = make_pdf([
        (1000, 7, 100, 100.0),
        (1000, 3, 200, 200.0),
        (1050, 3, 300, 500.0),
        (1050, 7, 400, 500.0),
        (1100, 3, 500, 800.0),
        (1100, 7, 600, 1000.0),
        (1150, 3, 700, 1200.0),
        (1150, 7, 800, 1400.0),
        (1200, 3, 900, 1600.0),
        (1200, 7, 1000, 1800.0),
        (1250, 3, 1100, 2000.0),
        (1250, 7, 1200, 2200.0),
    ], ["time", "id", "volume", "volume_sum"])
    tests_utils.assert_same(new_pdf2, expected_pdf2)

    interval_with_id = flintContext.read.pandas(
        make_pdf([
            (1000, 3),
            (1000, 7),
            (1050, 3),
            (1050, 7),
            (1100, 3),
            (1150, 3),
            (1150, 7),
            (1200, 3),
            (1200, 7),
            (1250, 7),
        ], ["time", "id"]))

    new_pdf3 = (interval_with_id.summarizeWindows(
        windows.past_absolute_time('99ns'),
        summarizers.sum("volume"),
        key="id",
        other=vol).toPandas())
    expected_pdf3 = make_pdf([
        (1000, 3, 200.0),
        (1000, 7, 100.0),
        (1050, 3, 500.0),
        (1050, 7, 500.0),
        (1100, 3, 800.0),
        (1150, 3, 1200.0),
        (1150, 7, 1400.0),
        (1200, 3, 1600.0),
        (1200, 7, 1800.0),
        (1250, 7, 2200.0),
    ], ["time", "id", "volume_sum"])
    tests_utils.assert_same(new_pdf3, expected_pdf3)