Пример #1
0
    def test_pandas_groupbynan(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        types = [(str, "e"), (int, -10), (float, -20.2),
                 (bytes, bytes("a", "ascii"))]
        skip = (numpy.bool_, numpy.complex64, numpy.complex128)
        types += [(_, _(5)) for _ in numpy_types() if _ not in skip]

        for ty in types:
            data = [{"this": "cst", "type": "tt1=" + str(ty[0]), "value": ty[1]},
                    {"this": "cst", "type": "tt2=" +
                        str(ty[0]), "value": ty[1]},
                    {"this": "cst", "type": "row_for_nan"}]
            df = pandas.DataFrame(data)
            gr = pandas_groupby_nan(df, "value")
            co = gr.sum()
            li = list(co["value"])
            # fLOG("###", li)
            assert numpy.isnan(li[-1])

        for ty in types:
            data = [{"this": "cst", "type": "tt1=" + str(ty[0]), "value": ty[1]},
                    {"this": "cst", "type": "tt2=" +
                        str(ty[0]), "value": ty[1]},
                    {"this": "cst", "type": "row_for_nan"}]
            df = pandas.DataFrame(data)
            try:
                gr = pandas_groupby_nan(df, ("value", "this"))
                t = True
                raise Exception("---")
            except TypeError:
                t = False
            if t:
                co = gr.sum()
                li = list(co["value"])
                # fLOG("###", li)
                assert numpy.isnan(li[-1])
            try:
                gr = pandas_groupby_nan(df, ["value", "this"])
                t = True
            except (TypeError, NotImplementedError):
                t = False
                pass

            if t:
                co = gr.sum()
                li = list(co["value"])
                self.assertEqual(len(li), 2)
Пример #2
0
    def test_pandas_groupbynan_tuple(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        data = [
            dict(a="a", b="b", c="c", n=1),
            dict(b="b", n=2),
            dict(a="a", n=3),
            dict(c="c", n=4)
        ]
        df = pandas.DataFrame(data)
        gr = df.groupby(["a", "b", "c"]).sum()
        self.assertEqual(gr.shape, (1, 1))

        for nanback in [True, False]:
            try:
                gr2_ = pandas_groupby_nan(df, ["a", "b", "c"],
                                          nanback=nanback,
                                          suffix="NAN")
            except NotImplementedError:
                continue
            gr2 = gr2_.sum().sort_values("n")
            self.assertEqual(gr2.shape, (4, 4))
            d = gr2.to_dict("records")
            fLOG(gr2)
            self.assertEqual(d[0]["a"], "a")
            self.assertEqual(d[0]["b"], "b")
            self.assertEqual(d[0]["c"], "c")
            self.assertEqual(d[0]["n"], 1)
            self.assertEqual(d[1]["a"], "NAN")
Пример #3
0
    def test_pandas_groupbynan_tuple(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        data = [dict(a="a", b="b", c="c", n=1), dict(
            b="b", n=2), dict(a="a", n=3), dict(c="c", n=4)]
        df = pandas.DataFrame(data)
        gr = df.groupby(["a", "b", "c"]).sum()
        self.assertEqual(gr.shape, (1, 1))

        for nanback in [True, False]:
            try:
                gr2_ = pandas_groupby_nan(
                    df, ["a", "b", "c"], nanback=nanback, suffix="NAN")
            except NotImplementedError:
                continue
            gr2 = gr2_.sum().sort_values("n")
            self.assertEqual(gr2.shape, (4, 4))
            d = gr2.to_dict("records")
            fLOG(gr2)
            self.assertEqual(d[0]["a"], "a")
            self.assertEqual(d[0]["b"], "b")
            self.assertEqual(d[0]["c"], "c")
            self.assertEqual(d[0]["n"], 1)
            self.assertEqual(d[1]["a"], "NAN")
Пример #4
0
    def test_pandas_groupbynan(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        types = [(str, "e"), (int, -10), (float, -20.2),
                 (bytes, bytes("a", "ascii"))]
        skip = (numpy.bool_, numpy.complex64, numpy.complex128)
        types += [(_, _(5)) for _ in numpy_types() if _ not in skip]

        for ty in types:
            data = [{
                "this": "cst",
                "type": "tt1=" + str(ty[0]),
                "value": ty[1]
            }, {
                "this": "cst",
                "type": "tt2=" + str(ty[0]),
                "value": ty[1]
            }, {
                "this": "cst",
                "type": "row_for_nan"
            }]
            df = pandas.DataFrame(data)
            gr = pandas_groupby_nan(df, "value")
            co = gr.sum()
            li = list(co["value"])
            # fLOG("###", li)
            assert numpy.isnan(li[-1])

        for ty in types:
            data = [{
                "this": "cst",
                "type": "tt1=" + str(ty[0]),
                "value": ty[1]
            }, {
                "this": "cst",
                "type": "tt2=" + str(ty[0]),
                "value": ty[1]
            }, {
                "this": "cst",
                "type": "row_for_nan"
            }]
            df = pandas.DataFrame(data)
            try:
                gr = pandas_groupby_nan(df, ("value", "this"))
                t = True
                raise Exception("---")
            except TypeError:
                t = False
            if t:
                co = gr.sum()
                li = list(co["value"])
                # fLOG("###", li)
                assert numpy.isnan(li[-1])
            try:
                gr = pandas_groupby_nan(df, ["value", "this"])
                t = True
            except (TypeError, NotImplementedError):
                t = False
                pass

            if t:
                co = gr.sum()
                li = list(co["value"])
                self.assertEqual(len(li), 2)