Exemplo n.º 1
0
    def test_table_arrow_loads_dictionary_stream_int32(self, util):
        data = [
            ([0, 1, 1, None], ["abc", "def"]),
            ([0, 1, None, 2], ["xx", "yy", "zz"])
        ]
        types = [[pa.int32(), pa.string()]] * 2
        arrow_data = util.make_dictionary_arrow(["a", "b"],
                                                data,
                                                types=types)
        tbl = Table(arrow_data)

        assert tbl.size() == 4
        assert tbl.schema() == {
            "a": str,
            "b": str
        }
        assert tbl.view().to_dict() == {
            "a": ["abc", "def", "def", None],
            "b": ["xx", "yy", None, "zz"]
        }
Exemplo n.º 2
0
    def test_table_pandas_periodindex(self, util):
        df = util.make_period_dataframe(30)
        tbl = Table(df)

        assert tbl.size() == 30
        assert tbl.schema() == {
            "index": date,
            "a": float,
            "b": float,
            "c": float,
            "d": float
        }

        assert tbl.view().to_dict()["index"][:5] == [
            datetime(2000, 1, 1),
            datetime(2000, 2, 1),
            datetime(2000, 3, 1),
            datetime(2000, 4, 1),
            datetime(2000, 5, 1)
        ]
Exemplo n.º 3
0
 def test_table_date_series(self, util):
     data = util.make_series(freq="D")
     tbl = Table(data)
     assert tbl.size() == 10
     assert tbl.schema() == {
         "index": date,
         "0": float
     }
     assert tbl.view().to_dict()["index"] == [
         datetime(2000, 1, 1),
         datetime(2000, 1, 2),
         datetime(2000, 1, 3),
         datetime(2000, 1, 4),
         datetime(2000, 1, 5),
         datetime(2000, 1, 6),
         datetime(2000, 1, 7),
         datetime(2000, 1, 8),
         datetime(2000, 1, 9),
         datetime(2000, 1, 10)
     ]
Exemplo n.º 4
0
 def test_table_time_series(self, util):
     data = util.make_series(freq="H")
     tbl = Table(data)
     assert tbl.size() == 10
     assert tbl.schema() == {
         "index": datetime,
         "0": float
     }
     assert tbl.view().to_dict()["index"] == [
         datetime(2000, 1, 1, 0, 0, 0),
         datetime(2000, 1, 1, 1, 0, 0),
         datetime(2000, 1, 1, 2, 0, 0),
         datetime(2000, 1, 1, 3, 0, 0),
         datetime(2000, 1, 1, 4, 0, 0),
         datetime(2000, 1, 1, 5, 0, 0),
         datetime(2000, 1, 1, 6, 0, 0),
         datetime(2000, 1, 1, 7, 0, 0),
         datetime(2000, 1, 1, 8, 0, 0),
         datetime(2000, 1, 1, 9, 0, 0)
     ]
Exemplo n.º 5
0
    def test_table_dataframe_minute_index(self, util):
        data = util.make_dataframe(size=5, freq="min")

        tbl = Table(data)
        assert tbl.size() == 5
        assert tbl.schema() == {
            "index": datetime,
            "a": float,
            "b": float,
            "c": float,
            "d": float
        }

        assert tbl.view().to_dict()["index"] == [
            datetime(2000, 1, 1, 0, 0),
            datetime(2000, 1, 1, 0, 1),
            datetime(2000, 1, 1, 0, 2),
            datetime(2000, 1, 1, 0, 3),
            datetime(2000, 1, 1, 0, 4)
        ]
Exemplo n.º 6
0
    def test_arbitary_port_updates(self):
        table = Table(data)
        port_ids = []

        for i in range(10):
            port_ids.append(table.make_port())

        assert port_ids == list(range(1, 11))

        port = random.randint(0, 10)

        table.update(data, port_id=port)

        assert table.size() == 8

        assert table.view().to_dict() == {
            "a": [1, 2, 3, 4] * 2,
            "b": ["a", "b", "c", "d"] * 2,
            "c": [True, False, True, False] * 2
        }
    def test_table_np_implicit_index(self):
        data = {
            "a": np.array(["a", "b", "c", "d", "e"]),
            "b": np.array([1, 2, 3, 4, 5])
        }
        tbl = Table(data)
        assert tbl.size() == 5
        assert tbl.schema() == {
            "a": str,
            "b": int
        }
        tbl.update({
            "__INDEX__": np.array([1, 2, 3, 4]),
            "a": np.array(["bb", "cc", "dd", "ee"])
        })

        assert tbl.view().to_dict() == {
            "a": ["a", "bb", "cc", "dd", "ee"],
            "b": [1, 2, 3, 4, 5]
        }
Exemplo n.º 8
0
    def test_update_arrow_partial_updates_dictionary_stream_duplicates(self, util):
        """If there are duplicate values in the dictionary, primary keys
        may be duplicated if the column is used as an index. Skip this test
        for now - still looking for the best way to fix."""
        data = [
            ([0, 1, 1, None, 2], ["a", "b", "a"]),
            ([0, 1, None, 2, 1], ["x", "y", "z"])
        ]
        arrow_data = util.make_dictionary_arrow(["a", "b"], data)

        tbl = Table({
            "a": str,
            "b": str
        }, index="a")

        tbl.update(arrow_data)

        assert tbl.size() == 3
        assert tbl.view().to_dict() == {
            "a": [None, "a", "b"],
            "b": ["z", "x", "y"]
        }
Exemplo n.º 9
0
 def test_update_arrow_updates_append_timestamp_all_formats_stream(
         self, util):
     data = [[datetime(2019, 2, i, 9) for i in range(1, 11)],
             [datetime(2019, 2, i, 10) for i in range(1, 11)],
             [datetime(2019, 2, i, 11) for i in range(1, 11)],
             [datetime(2019, 2, i, 12) for i in range(1, 11)]]
     arrow_data = util.make_arrow(names,
                                  data,
                                  types=[
                                      pa.timestamp("s"),
                                      pa.timestamp("ms"),
                                      pa.timestamp("us"),
                                      pa.timestamp("ns"),
                                  ])
     tbl = Table(arrow_data)
     tbl.update(arrow_data)
     assert tbl.size() == 20
     assert tbl.view().to_dict() == {
         "a": data[0] + data[0],
         "b": data[1] + data[1],
         "c": data[2] + data[2],
         "d": data[3] + data[3],
     }
Exemplo n.º 10
0
 def test_update_arrow_arbitary_order(self, util):
     data = [[1, 2, 3, 4],
             ["a", "b", "c", "d"],
             [1, 2, 3, 4],
             ["a", "b", "c", "d"]]
     update_data = [[5, 6], ["e", "f"], [5, 6], ["e", "f"]]
     arrow = util.make_arrow(["a", "b", "c", "d"], data)
     update_arrow = util.make_arrow(["c", "b", "a", "d"], update_data)
     tbl = Table(arrow)
     assert tbl.schema() == {
         "a": int,
         "b": str,
         "c": int,
         "d": str
     }
     tbl.update(update_arrow)
     assert tbl.size() == 6
     assert tbl.view().to_dict() == {
         "a": [1, 2, 3, 4, 5, 6],
         "b": ["a", "b", "c", "d", "e", "f"],
         "c": [1, 2, 3, 4, 5, 6],
         "d": ["a", "b", "c", "d", "e", "f"]
     }
Exemplo n.º 11
0
    def test_update_arrow_thread_safe_datetime_index(self, util):
        data = [["a", "b", "c"] for i in range(10)]
        data += [[
            datetime(2020, 1, 15, 12, 17),
            datetime(2020, 1, 15, 12, 18),
            datetime(2020, 1, 15, 12, 19)
        ]]
        names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
        arrow = util.make_arrow(names, data)
        tbl = Table(arrow, index="uid")

        for i in range(100):
            idx = (datetime(2020, 1, 15, 12,
                            17), datetime(2020, 1, 15, 12, 18),
                   datetime(2020, 1, 15, 12, 19))[random.randint(0, 2)]
            update_data = [[
                str(uuid.uuid4()) + str(random.randint(100, 1000000000))
            ], [idx]]
            update_names = [names[random.randint(0, 9)], "uid"]
            update_arrow = util.make_arrow(update_names, update_data)
            tbl.update(update_arrow)

        assert tbl.size() == 3
Exemplo n.º 12
0
 def test_update_bool_str_all_formats_from_schema(self):
     bool_data = [{
         "a": "True",
         "b": "False"
     }, {
         "a": "t",
         "b": "f"
     }, {
         "a": "true",
         "b": "false"
     }, {
         "a": 1,
         "b": 0
     }, {
         "a": "on",
         "b": "off"
     }]
     tbl = Table({"a": bool, "b": bool})
     tbl.update(bool_data)
     assert tbl.size() == 5
     assert tbl.view().to_dict() == {
         "a": [True, True, True, True, True],
         "b": [False, False, False, False, False]
     }
Exemplo n.º 13
0
    def test_colpivots(self):
        arrays = [
            np.array([
                'bar', 'bar', 'bar', 'bar', 'baz', 'baz', 'baz', 'baz', 'foo',
                'foo', 'foo', 'foo', 'qux', 'qux', 'qux', 'qux'
            ]),
            np.array([
                'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two', 'one',
                'one', 'two', 'two', 'one', 'one', 'two', 'two'
            ]),
            np.array([
                'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y',
                'X', 'Y', 'X', 'Y'
            ])
        ]
        tuples = list(zip(*arrays))
        index = pd.MultiIndex.from_tuples(tuples,
                                          names=['first', 'second', 'third'])

        df_both = pd.DataFrame(np.random.randn(3, 16),
                               index=['A', 'B', 'C'],
                               columns=index)
        table = Table(df_both)
        assert table.size() == 48
    def test_table_np_promote(self):
        data = {
            "a": np.arange(5),
            "b": np.full(5, np.nan),
            "c": np.array([1, 2, 3, 2147483648, 5])
        }
        tbl = Table({
            "a": int,
            "b": float,
            "c": int
        })
        tbl.update(data)
        assert tbl.size() == 5
        assert tbl.schema() == {
            "a": int,
            "b": float,
            "c": int
        }

        assert tbl.view().to_dict() == {
            "a": [0, 1, 2, 3, 4],
            "b": [None, None, None, None, None],
            "c": [1.0, 2.0, 3.0, 2147483648.0, 5.0]
        }
Exemplo n.º 15
0
 def test_table_datetime(self):
     str_data = [{"a": datetime.now(), "b": datetime.now()}]
     tbl = Table(str_data)
     assert tbl.size() == 1
     assert tbl.schema() == {"a": datetime, "b": datetime}
Exemplo n.º 16
0
 def test_table_series(self):
     import pandas as pd
     data = pd.Series([1, 2, 3], name="a")
     tbl = Table(data)
     assert tbl.size() == 3
 def test_table_infer_bool_str(self):
     bool_data = [{"a": "True", "b": "False"}, {"a": "True", "b": "True"}]
     tbl = Table(bool_data)
     assert tbl.size() == 2
     assert tbl.schema() == {"a": bool, "b": bool}
 def test_table_infer_bool(self):
     bool_data = [{"a": True, "b": False}, {"a": True, "b": True}]
     tbl = Table(bool_data)
     assert tbl.size() == 2
     assert tbl.schema() == {"a": bool, "b": bool}
 def test_table_float(self):
     data = {"a": np.array([1.1, 2.2]), "b": np.array([3.3, 4.4])}
     tbl = Table(data)
     assert tbl.size() == 2
     assert tbl.view().to_dict() == {"a": [1.1, 2.2], "b": [3.3, 4.4]}
 def test_empty_table(self):
     tbl = Table([])
     assert tbl.size() == 0
Exemplo n.º 21
0
 def test_table_indexed_series(self):
     import pandas as pd
     data = pd.Series([1, 2, 3], index=["a", "b", "c"], name="a")
     tbl = Table(data)
     assert tbl.schema() == {"index": str, "a": int}
     assert tbl.size() == 3
Exemplo n.º 22
0
 def test_table_columnar(self):
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     tbl = Table(data)
     assert tbl.columns() == ["a", "b"]
     assert tbl.size() == 3
     assert tbl.schema() == {"a": int, "b": int}
Exemplo n.º 23
0
 def test_update_bool_from_schema(self):
     bool_data = [{"a": True, "b": False}, {"a": True, "b": True}]
     tbl = Table({"a": bool, "b": bool})
     tbl.update(bool_data)
     assert tbl.size() == 2
     assert tbl.view().to_records() == bool_data
Exemplo n.º 24
0
 def test_table_int(self):
     data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
     tbl = Table(data)
     assert tbl.size() == 2
     assert tbl.schema() == {"a": int, "b": int}
Exemplo n.º 25
0
 def test_update_bool_int_from_schema(self):
     bool_data = [{"a": 1, "b": 0}, {"a": 1, "b": 0}]
     tbl = Table({"a": bool, "b": bool})
     tbl.update(bool_data)
     assert tbl.size() == 2
     assert tbl.view().to_dict() == {"a": [True, True], "b": [False, False]}
Exemplo n.º 26
0
 def test_table_index(self):
     data = [{"a": 1, "b": 2}, {"a": 1, "b": 4}]
     tbl = Table(data, index="a")
     assert tbl.size() == 1
     assert tbl.view().to_records() == [{"a": 1, "b": 4}]
 def test_table_bool(self):
     data = {"a": np.array([True, False]), "b": np.array([False, True])}
     tbl = Table(data)
     assert tbl.size() == 2
     assert tbl.view().to_dict() == {"a": [True, False], "b": [False, True]}
Exemplo n.º 28
0
 def test_table_limit(self):
     data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
     tbl = Table(data, limit=1)
     assert tbl.size() == 1
     assert tbl.view().to_records() == [{"a": 3, "b": 4}]
 def test_table_int(self):
     data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])}
     tbl = Table(data)
     assert tbl.size() == 3
     assert tbl.view().to_dict() == {"a": [1, 2, 3], "b": [4, 5, 6]}
Exemplo n.º 30
0
 def test_table_nones(self):
     none_data = [{"a": 1, "b": None}, {"a": None, "b": 2}]
     tbl = Table(none_data)
     assert tbl.size() == 2
     assert tbl.schema() == {"a": int, "b": int}