def test_histogram(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        values = [random.random() for i in range(0, 100)]
        values = [[x, x + random.random() / 2] for x in values]
        tbl = TableFormula(["x", "y"], values)
        hist = tbl.histogram(lambda v: (v["x"], 1), 10)
        assert len(hist) == 11
        ma = max([h[1] for h in hist.values])
        mi = min([h[1] for h in hist.values])
        assert mi < ma
        assert ma < 0.5
        su = sum([h[1] for h in hist.values])
        assert abs(su - 1) < 1e-5
        tbl.values.append([-1., -1.])
        tbl.values.append([2., 2.])
        hist2 = tbl.histogram(lambda v: (v["x"], 1), 10)
        assert hist2[0, 0] > 0
        assert hist2[-1, 0] < 1

        hist = tbl.values_to_float().histograms(["x", "y"], 10)
        fil = hist.filter(lambda v: v["histKey"] is None)
        assert len(fil) == 0
        fil = hist.filter(lambda v: v["x"] is None)
        assert len(fil) > 0
    def test_pandas_matrix(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]
        file = os.path.join(fold, "data", "BNP.PA.txt")

        df = pandas.read_csv(file, sep=",")
        assert "Date" in df.columns
        assert "High" in df.columns
        assert len(df) == 2344
        mat = TableFormula(df)
        assert len(mat) == 2344
        if not isinstance(mat.header, list):
            raise Exception("expecting type: " + str(type(mat.header)))
        assert mat.header == [
            'index', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume',
            'Adj Close'
        ]

        df = pandas.read_csv(file, sep=",")
        df.set_index("Date")
        mat = TableFormula(df)
        assert len(mat) == 2344
        assert mat.header == [
            'index', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume',
            'Adj Close'
        ]
        df = mat.dataframe
        assert len(df) == 2344
Exemplo n.º 3
0
 def test_matrix_array2(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     cen = tbl.center_reduce()
     assert cen.size == tbl.size
     assert cen[0, 0] != tbl[0, 0]
Exemplo n.º 4
0
 def test_union_columns(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 100)]
     values = [[x, x + random.random() / 2] for x in values]
     tbl = TableFormula(["x", "y"], values)
     union = tbl.union_columns(["x", "y"])
     assert union.size == (200, 1)
    def test_addc(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        values = [random.random() for i in range(0, 100)]
        values = [[x, x + random.random() / 2] for x in values]
        tbl = TableFormula(["x", "y"], values)

        tbl.addc("a", lambda v: 0, 0)
        assert tbl.header == ["a", "x", "y"]

        tbl.addc(("aa", "bb"), [lambda v: 4, lambda v: 5], 0)
        assert tbl.header == ["aa", "bb", "a", "x", "y"]

        tbl.addc(("aaa", "bbb"), lambda v: (7, 8), 0)
        assert tbl.header == ["aaa", "bbb", "aa", "bb", "a", "x", "y"]
        assert tbl[0, 0] == 7
        assert tbl[0, 1] == 8

        tbl.addc(("aaaa", "bbba"), lambda v: (8, 9))
        assert tbl.header == [
            "aaa", "bbb", "aa", "bb", "a", "x", "y", "aaaa", "bbba"
        ]
        assert tbl[0, -2] == 8
        assert tbl[0, -1] == 9
 def test_matrix_array2(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     cen = tbl.center_reduce()
     self.assertEqual(cen.size, tbl.size)
     self.assertNotEqual(cen[0, 0], tbl[0, 0])
 def test_union_columns(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 100)]
     values = [[x, x + random.random() / 2] for x in values]
     tbl = TableFormula(["x", "y"], values)
     union = tbl.union_columns(["x", "y"])
     assert union.size == (200, 1)
Exemplo n.º 8
0
 def test_matrix_operator(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     d2 = tbl + tbl
     dm = tbl * -1
     tt = d2 + dm
     assert tbl == tt
     rep = tbl.replicate(2)
     assert len(rep) == len(tbl) * 2
 def test_matrix_operator(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     d2 = tbl + tbl
     dm = tbl * -1
     tt = d2 + dm
     self.assertEqual(tbl, tt)
     rep = tbl.replicate(2)
     self.assertEqual(len(rep), len(tbl) * 2)
Exemplo n.º 10
0
 def test_mu_sigma(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 1000)]
     values = [[random.gauss(-1, 3)] for x in values]
     tbl = TableFormula(["x"], values)
     mu, si = tbl.mu_sigma(lambda v: v["x"])
     assert abs(mu + 1) < 0.3
     assert abs(si - 3) < 0.3
     mu, si = tbl.mu_sigma(lambda v: v["x"], removeExtreme=0.01)
     assert abs(mu + 1) < 0.3
     assert abs(si - 3) < 0.3
     all = tbl.mu_sigma_each_column(removeExtreme=0.01)
     assert all.size == (2, 1)
    def test_TableFormulaCore_Excel(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]

        assert TableFormula.delta is not None

        file = os.path.join(fold, "data", "BNP.PA.txt")
        table = TableFormula(file, sep=",")
        table.sort(lambda v: v["Date"])
        assert len(table) > 0

        tempfold = os.path.join(fold, "temp_store")
        if not os.path.exists(tempfold):
            os.mkdir(tempfold)

        tempexc = os.path.join(tempfold, "temp_excel_table.xls")
        if os.path.exists(tempexc):
            os.remove(tempexc)
        assert not os.path.exists(tempexc)

        table.save_as_excel(tempexc)
        assert os.path.exists(tempexc)

        tempexc = os.path.join(tempfold, "temp_excel_table.xlsx")
        if os.path.exists(tempexc):
            os.remove(tempexc)
        assert not os.path.exists(tempexc)

        table.save_as_excel(tempexc)
        assert os.path.exists(tempexc)
Exemplo n.º 12
0
 def test_correlation(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 100)]
     values = [[x, x + random.random() / 2] for x in values]
     tbl = TableFormula(["x", "y"], values)
     cov = tbl.covariance()
     assert len(cov.values) == 2
     assert len(cov.header) == 3
     assert cov[1, 1] == cov[0, 2]
     cor = tbl.correlation()
     assert len(cor.values) == 2
     assert len(cor.header) == 3
     assert cov[1, 1] == cov[0, 2]
     assert abs(cor[0, 1] - cor[1, 2]) < 1e-5
     assert abs(1 - cor[1, 2]) < 1e-5
 def test_mu_sigma(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 1000)]
     values = [[random.gauss(-1, 3)] for x in values]
     tbl = TableFormula(["x"], values)
     mu, si = tbl.mu_sigma(lambda v: v["x"])
     self.assertTrue(abs(mu + 1) < 0.5)
     self.assertTrue(abs(si - 3) < 0.5)
     mu, si = tbl.mu_sigma(lambda v: v["x"], removeExtreme=0.01)
     self.assertTrue(abs(mu + 1) < 0.5)
     self.assertTrue(abs(si - 3) < 0.5)
     all = tbl.mu_sigma_each_column(removeExtreme=0.01)
     self.assertEqual(all.size, (2, 1))
Exemplo n.º 14
0
    def test_split_files(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]
        file = os.path.join(fold, "data", "BNP.PA.txt")
        tempf = os.path.join(fold, "temp_split")
        assert os.path.exists(file)
        if not os.path.exists(tempf):
            os.mkdir(tempf)
        f_ = os.path.join(tempf, "temp_split")
        f1 = f_ + ".0000.txt"
        f2 = f_ + ".0001.txt"
        for f in [f1, f2]:
            if os.path.exists(f):
                os.remove(f)

        split = TableFormula.random_split_file(file, f_, 2, logFunction=fLOG)
        assert split
        for f in [f1, f2]:
            fLOG(f)
            assert os.path.exists(f)

        with open(file, "r") as f:
            lines = f.readlines()
        with open(f1, "r") as f:
            lines1 = f.readlines()
        with open(f2, "r") as f:
            lines2 = f.readlines()

        assert len(lines) == len(lines1) + len(lines2) - 1
    def test_split_files(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]
        file = os.path.join(fold, "data", "BNP.PA.txt")
        tempf = os.path.join(fold, "temp_split")
        assert os.path.exists(file)
        if not os.path.exists(tempf):
            os.mkdir(tempf)
        f_ = os.path.join(tempf, "temp_split")
        f1 = f_ + ".0000.txt"
        f2 = f_ + ".0001.txt"
        for f in [f1, f2]:
            if os.path.exists(f):
                os.remove(f)

        split = TableFormula.random_split_file(file, f_, 2, logFunction=fLOG)
        assert split
        for f in [f1, f2]:
            fLOG(f)
            assert os.path.exists(f)

        with open(file, "r") as f:
            lines = f.readlines()
        with open(f1, "r") as f:
            lines1 = f.readlines()
        with open(f2, "r") as f:
            lines2 = f.readlines()

        assert len(lines) == len(lines1) + len(lines2) - 1
 def test_correlation(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 100)]
     values = [[x, x + random.random() / 2] for x in values]
     tbl = TableFormula(["x", "y"], values)
     cov = tbl.covariance()
     assert len(cov.values) == 2
     assert len(cov.header) == 3
     assert cov[1, 1] == cov[0, 2]
     cor = tbl.correlation()
     assert len(cor.values) == 2
     assert len(cor.header) == 3
     assert cov[1, 1] == cov[0, 2]
     assert abs(cor[0, 1] - cor[1, 2]) < 1e-5
     assert abs(1 - cor[1, 2]) < 1e-5
 def test_TableFormulaCore_with_dict(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     data = [{"one": 1, "two": 2}, {"two": 2.1, "three": 3}]
     table = TableFormula(data)
     for row in table.values:
         assert len(row) == 3
Exemplo n.º 18
0
    def test_TableFormulaCore_Excel(self):
        fLOG(__file__, self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]

        assert TableFormula.delta is not None

        file = os.path.join(fold, "data", "BNP.PA.txt")
        table = TableFormula(file, sep=",")
        table.sort(lambda v: v["Date"])
        assert len(table) > 0

        tempfold = os.path.join(fold, "temp_store")
        if not os.path.exists(tempfold):
            os.mkdir(tempfold)

        tempexc = os.path.join(tempfold, "temp_excel_table.xls")
        if os.path.exists(tempexc):
            os.remove(tempexc)
        assert not os.path.exists(tempexc)

        table.save_as_excel(tempexc)
        assert os.path.exists(tempexc)

        tempexc = os.path.join(tempfold, "temp_excel_table.xlsx")
        if os.path.exists(tempexc):
            os.remove(tempexc)
        assert not os.path.exists(tempexc)

        table.save_as_excel(tempexc)
        assert os.path.exists(tempexc)
Exemplo n.º 19
0
 def test_matrix_array(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     mat = tbl.np_matrix
     assert isinstance(mat, numpy.matrix)
     tblm = TableFormula(tbl.header, mat)
     assert isinstance(tblm[0, 0], float)
     assert "[[" not in str(tblm)
     if tblm != tbl:
         delta = tbl.delta(tblm)
         for d in delta:
             fLOG(d)
         assert False
     arr = tbl.np_array
     assert isinstance(arr, numpy.ndarray)
     tbla = TableFormula(tbl.header, arr)
     assert tbla == tbl
 def test_pandas_matrix_index(self):
     fLOG(__file__, self._testMethodName,
          OutputPrint=__name__ == "__main__")
     fold = os.path.split(__file__)[0]
     file = os.path.join(fold, "data", "BNP.PA.txt")
     df = pandas.read_csv(file, sep=",", index_col=["Date"])
     mat = TableFormula(df)
     assert len(mat) == 2344
     assert mat.header == ['index', 'Open', 'High',
                           'Low', 'Close', 'Volume', 'Adj Close']
 def test_json(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [random.random() for i in range(0, 100)]
     values = [[x, x + random.random() / 2] for x in values]
     tbl = TableFormula(["x", "y"], values)
     jso = tbl.json
     self.assertTrue(len(jso) > 0)
     self.assertTrue(isinstance(jso, str))
 def test_matrix_array(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     mat = tbl.np_matrix
     self.assertTrue(isinstance(mat, numpy.matrix))
     tblm = TableFormula(tbl.header, mat)
     self.assertTrue(isinstance(tblm[0, 0], float))
     self.assertTrue("[[" not in str(tblm))
     if tblm != tbl:
         delta = tbl.delta(tblm)
         for d in delta:
             fLOG(d)
         self.assertTrue(False)
     arr = tbl.np_array
     self.assertTrue(isinstance(arr, numpy.ndarray))
     tbla = TableFormula(tbl.header, arr)
     self.assertEqual(tbla, tbl)
Exemplo n.º 23
0
    def test_td9_json(self):
        fLOG(__file__, self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]
        data = os.path.join(fold, "data", "td9_by_hours.txt")
        tbl = TableFormula(data)
        tbl = tbl.values_to_float(True)
        jso = []
        for row in tbl:
            r = copy.copy(row)
            r["name"] = r["last_update"]
            jso.append(r)

        assert len(jso) > 0
        outf = os.path.join(fold, "out_json_paris_velib.json")
        if os.path.exists(outf):
            os.remove(outf)
        with open(outf, "w") as f:
            f.write("[\n")
            f.write("\n".join([str(_) for _ in jso]))
            f.write("\n]\n")
        assert os.path.exists(outf)
Exemplo n.º 24
0
 def test_multiply_implicit(self):
     fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
     text = """city\tdate
     A\tjan
     A\tfeb
     B\tfeb""".replace("        ", "")
     table = TableFormula(text)
     assert len(table) == 3
     mul = table.multiply_column_by_row_instance(
         lambda v: v["date"],
         lambda v: v["city"])
     exp = """KEY\tA|city\tA|date\tB|city\tB|date
     feb\tA\tfeb\tB\tfeb
     jan\tA\tjan\tNone\tNone""".replace("        ", "")
     exp = TableFormula(exp)
     exp.sort(lambda v: v["KEY"])
     mul.sort(lambda v: v["KEY"])
     delta = mul.delta(exp)
     if len(delta) > 0:
         for _ in delta:
             fLOG(_)
         assert False
 def test_iter(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     values = [[random.random(), random.random()] for i in range(0, 10)]
     tbl = TableFormula(["x", "y"], values)
     nb = 0
     for row in tbl:
         self.assertTrue(isinstance(row, dict))
         self.assertTrue("x" in row)
         self.assertTrue("y" in row)
         nb += 1
     assert nb > 0
    def test_td9_json(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]
        data = os.path.join(fold, "data", "td9_by_hours.txt")
        tbl = TableFormula(data)
        tbl = tbl.values_to_float(True)
        jso = []
        for row in tbl:
            r = copy.copy(row)
            r["name"] = r["last_update"]
            jso.append(r)

        assert len(jso) > 0
        outf = os.path.join(fold, "out_json_paris_velib.json")
        if os.path.exists(outf):
            os.remove(outf)
        with open(outf, "w") as f:
            f.write("[\n")
            f.write("\n".join([str(_) for _ in jso]))
            f.write("\n]\n")
        assert os.path.exists(outf)
Exemplo n.º 27
0
    def test_histogram(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        values = [random.random() for i in range(0, 100)]
        values = [[x, x + random.random() / 2] for x in values]
        tbl = TableFormula(["x", "y"], values)
        hist = tbl.histogram(lambda v: (v["x"], 1), 10)
        assert len(hist) == 11
        ma = max([h[1] for h in hist.values])
        mi = min([h[1] for h in hist.values])
        assert mi < ma
        assert ma < 0.5
        su = sum([h[1] for h in hist.values])
        assert abs(su - 1) < 1e-5
        tbl.values.append([-1., -1.])
        tbl.values.append([2., 2.])
        hist2 = tbl.histogram(lambda v: (v["x"], 1), 10)
        assert hist2[0, 0] > 0
        assert hist2[-1, 0] < 1

        hist = tbl.values_to_float().histograms(["x", "y"], 10)
        fil = hist.filter(lambda v: v["histKey"] is None)
        assert len(fil) == 0
        fil = hist.filter(lambda v: v["x"] is None)
        assert len(fil) > 0
Exemplo n.º 28
0
    def test_addc(self):
        fLOG(__file__, self._testMethodName,
             OutputPrint=__name__ == "__main__")
        values = [random.random() for i in range(0, 100)]
        values = [[x, x + random.random() / 2] for x in values]
        tbl = TableFormula(["x", "y"], values)

        tbl.addc("a", lambda v: 0, 0)
        assert tbl.header == ["a", "x", "y"]

        tbl.addc(("aa", "bb"), [lambda v: 4, lambda v: 5], 0)
        assert tbl.header == ["aa", "bb", "a", "x", "y"]

        tbl.addc(("aaa", "bbb"), lambda v: (7, 8), 0)
        assert tbl.header == ["aaa", "bbb", "aa", "bb", "a", "x", "y"]
        assert tbl[0, 0] == 7
        assert tbl[0, 1] == 8

        tbl.addc(("aaaa", "bbba"), lambda v: (8, 9))
        assert tbl.header == ["aaa", "bbb", "aa",
                              "bb", "a", "x", "y", "aaaa", "bbba"]
        assert tbl[0, -2] == 8
        assert tbl[0, -1] == 9
    def test_TableFormulaStat(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        table = TableFormula("sum_y#1#1#1#1#1#1#1#1#1#1#1".replace(
            " ", "\t").replace("#", "\n"))
        gini = table.Gini(lambda v: v["sum_y"])
        assert gini == 0.

        table = TableFormula("sum_y#1#1#1#1#1#1#1#1#1#1#1#5#10".replace(
            " ", "\t").replace("#", "\n"))
        gini = table.Gini(lambda v: v["sum_y"])
        assert 0 < gini < 1
 def test_multiply_implicit(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     text = """city\tdate
     A\tjan
     A\tfeb
     B\tfeb""".replace("        ", "")
     table = TableFormula(text)
     assert len(table) == 3
     mul = table.multiply_column_by_row_instance(lambda v: v["date"],
                                                 lambda v: v["city"])
     exp = """KEY\tA|city\tA|date\tB|city\tB|date
     feb\tA\tfeb\tB\tfeb
     jan\tA\tjan\tNone\tNone""".replace("        ", "")
     exp = TableFormula(exp)
     exp.sort(lambda v: v["KEY"])
     mul.sort(lambda v: v["KEY"])
     delta = mul.delta(exp)
     if len(delta) > 0:
         for _ in delta:
             fLOG(_)
         assert False
Exemplo n.º 31
0
    def test_TableFormulaCore(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]

        assert TableFormula.delta is not None

        file = os.path.join(fold, "data", "BNP.PA.txt")
        table = TableFormula(file, sep=",")
        table.sort(lambda v: v["Date"])
        assert len(table) > 0

        table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5"
                             .replace(" ", "\t").replace("#", "\n"))
        assert "d_a\td_b\td_c" in str(table)

        dist = table.get_distinct_values("name")
        assert len(dist) > 0

        table.add_column("has_A", lambda v: 1. if "A" in v["name"] else 0.)
        assert len(table) > 0

        x = 1. / 3
        table.add_column_smooth("has_A_smooth", lambda v: v[
                                "has_A"], [-1, 0, 1], [x, x, x])
        assert len(table) > 0

        fil = table.filter(lambda v: v["d_b"] == 2)
        assert len(table) > 0

        rnd = table.random(5)
        assert len(rnd) > 0

        rnd = table.random(1, True)
        assert len(rnd) > 0

        fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4)
        assert len(fil) > 0

        total = table.aggregate(lambda v: v["d_c"])
        assert total > 0

        table.sort(lambda v: v["d_b"] + v["d_c"])
        assert len(table) > 0

        union = table.union(table)
        assert len(union) > len(table)

        group = table.groupby(lambda v: v["name"],
                              [lambda v: v["d_a"],
                               lambda v: v["d_b"]],
                              ["name", "sum_d_a", "sum_d_b"])
        assert len(group) > 0

        groupmax = table.groupby(lambda v: v["name"],
                                 [lambda v: v["d_a"],
                                  lambda v: v["d_b"]],
                                 ["name", "max_d_a", "max_d_b"],
                                 [max, max])
        assert len(groupmax) > 0

        group = table.groupby(lambda v: v["name"],
                              [lambda v: v["d_a"]],
                              ["name", "weight", "sum_d_a"],
                              [lambda vec, w: sum(vec) / w],
                              lambda v: v["d_b"])
        innerjoin = table.innerjoin(group, lambda v: v["name"],
                                    lambda v: v["name"], "group")
        assert len(innerjoin) > 0

        ext = table.extract_columns(["name", "d_a"])
        assert len(ext) > 0

        ext = table.remove_columns(["d_a"])
        assert len(ext) > 0

        d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True)
        assert len(d) > 0

        d = table.select(lambda v: (v["name"], v["d_b"]))
        assert len(list(d)) > 0

        table.create_index(lambda v: (v["name"], v["d_a"]))
        row = table.get(('A', 1.1))
        assert row
        value = table.get(('A', 1.1), 2)
        assert value
        table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5"
                             .replace(" ", "\t").replace("#", "\n"))
        table.add_column("key_add", lambda v: "unique")
        mul = table.multiply_column_by_row_instance(
            lambda v: v["key_add"],
            lambda v: v["name"])
        assert len(mul) > 0

        table = TableFormula("key_name sum_a len_b avg_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5"
                             .replace(" ", "\t").replace("#", "\n"))
        gr = table.groupby_implicit(lambda v: v["key_name"])
        assert len(gr) > 0
Exemplo n.º 32
0
 def test_TableFormula_correlation_bicolumn(self):
     vals = [0., 1.]
     vals = [vals, vals]
     cor = TableFormula.correlation_bicolumn(vals)
     self.assertEqual(cor, 0.)
Exemplo n.º 33
0
 def test_TableFormula_bootstrap(self):
     vals = [0., 1.]
     vals = [vals, vals]
     res = TableFormula.bootstrap(vals, lambda xx: sum(sum(x) for x in xx))
     self.assertEqual(res, (2.0, 2.0, 2.0, 2.0, 2.0))
Exemplo n.º 34
0
    def test_TableFormula_init(self):
        vals = [0., 1.]
        vals = [vals, vals]
        tbl = TableFormula(vals)
        tbl.header = ["x", "y"]
        np = numpy.array(vals)
        tbl2 = TableFormula(np)
        self.assertEqual(str(tbl), 'x\ty\n0.0\t1.0')
        self.assertEqual(str(tbl2), 'c0\tc1\n0.0\t1.0\n0.0\t1.0')
        tbl[0, 1] = 2.
        tbl3 = tbl.multiplication_term_term(tbl)
        self.assertEqual(str(tbl3), 'x\ty\n0.0\t4.0')
        html = tbl.__html__()
        self.assertEqual(
            html, '<table>\n<tr><th>x</th><th>y</th></tr>\n<tr><td>0.0</td><td>2.0</td></tr>\n</table>\n')
        rst = tbl.__rst__()
        self.assertEqual(
            rst, '+-----+-----+\n| x   | y   |\n+=====+=====+\n| 0.0 | 2.0 |\n+-----+-----+\n')
        st = tbl.strtype()
        self.assertEqual(st, "x\ty\n<class 'float'>\t<class 'float'>")
        tbl.change_header(['xx', 'yy'])
        st = tbl.strtype()
        self.assertEqual(st, "xx\tyy\n<class 'float'>\t<class 'float'>")
        tbl.add_column_vector('zz', [5.])
        b = str(tbl)
        self.assertEqual(b, 'xx\tyy\tzz\n0.0\t2.0\t5.0')
        tbl4 = tbl.concatenate(tbl2)
        b = str(tbl4)
        self.assertEqual(
            b, 'xx\tyy\tzz\tc0\tc1\n0.0\t2.0\t5.0\t0.0\t1.0\nNone\tNone\tNone\t0.0\t1.0')

        vals = [0., 1.]
        vals = numpy.array([vals, vals])
        tbl = TableFormula(vals)
        tbl.header = ["x", "y"]
        cor = tbl.covariance_col('x', 'y')
        self.assertEqual(cor, 0)
 def test_empty_table(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     tbl = TableFormula([["x", "y"]])
     self.assertEqual(tbl.size, (0, 2))
    def test_TableFormulaCore(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        fold = os.path.split(__file__)[0]

        assert TableFormula.delta is not None

        file = os.path.join(fold, "data", "BNP.PA.txt")
        table = TableFormula(file, sep=",")
        table.sort(lambda v: v["Date"])
        assert len(table) > 0

        table = TableFormula(
            "name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(
                " ", "\t").replace("#", "\n"))
        assert "d_a\td_b\td_c" in str(table)

        dist = table.get_distinct_values("name")
        assert len(dist) > 0

        table.add_column("has_A", lambda v: 1. if "A" in v["name"] else 0.)
        assert len(table) > 0

        x = 1. / 3
        table.add_column_smooth("has_A_smooth", lambda v: v["has_A"],
                                [-1, 0, 1], [x, x, x])
        assert len(table) > 0

        fil = table.filter(lambda v: v["d_b"] == 2)
        assert len(table) > 0

        rnd = table.random(5)
        assert len(rnd) > 0

        rnd = table.random(1, True)
        assert len(rnd) > 0

        fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4)
        assert len(fil) > 0

        total = table.aggregate(lambda v: v["d_c"])
        assert total > 0

        table.sort(lambda v: v["d_b"] + v["d_c"])
        assert len(table) > 0

        union = table.union(table)
        assert len(union) > len(table)

        group = table.groupby(lambda v: v["name"],
                              [lambda v: v["d_a"], lambda v: v["d_b"]],
                              ["name", "sum_d_a", "sum_d_b"])
        assert len(group) > 0

        groupmax = table.groupby(lambda v: v["name"],
                                 [lambda v: v["d_a"], lambda v: v["d_b"]],
                                 ["name", "max_d_a", "max_d_b"], [max, max])
        assert len(groupmax) > 0

        group = table.groupby(lambda v: v["name"], [lambda v: v["d_a"]],
                              ["name", "weight", "sum_d_a"],
                              [lambda vec, w: sum(vec) / w],
                              lambda v: v["d_b"])
        innerjoin = table.innerjoin(group, lambda v: v["name"],
                                    lambda v: v["name"], "group")
        assert len(innerjoin) > 0

        ext = table.extract_columns(["name", "d_a"])
        assert len(ext) > 0

        ext = table.remove_columns(["d_a"])
        assert len(ext) > 0

        d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True)
        assert len(d) > 0

        d = table.select(lambda v: (v["name"], v["d_b"]))
        assert len(list(d)) > 0

        table.create_index(lambda v: (v["name"], v["d_a"]))
        row = table.get(('A', 1.1))
        assert row
        value = table.get(('A', 1.1), 2)
        assert value
        table = TableFormula(
            "name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(
                " ", "\t").replace("#", "\n"))
        table.add_column("key_add", lambda v: "unique")
        mul = table.multiply_column_by_row_instance(lambda v: v["key_add"],
                                                    lambda v: v["name"])
        assert len(mul) > 0

        table = TableFormula(
            "key_name sum_a len_b avg_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(
                " ", "\t").replace("#", "\n"))
        gr = table.groupby_implicit(lambda v: v["key_name"])
        assert len(gr) > 0