def test_table_arrow_loads_dictionary_stream_int32(self, util): data = [ ([0, 1, 1, None], ["abc", "def"]), ([0, 1, None, 2], ["xx", "yy", "zz"]) ] types = [[pa.int32(), pa.string()]] * 2 arrow_data = util.make_dictionary_arrow(["a", "b"], data, types=types) tbl = Table(arrow_data) assert tbl.size() == 4 assert tbl.schema() == { "a": str, "b": str } assert tbl.view().to_dict() == { "a": ["abc", "def", "def", None], "b": ["xx", "yy", None, "zz"] }
def test_table_pandas_periodindex(self, util): df = util.make_period_dataframe(30) tbl = Table(df) assert tbl.size() == 30 assert tbl.schema() == { "index": date, "a": float, "b": float, "c": float, "d": float } assert tbl.view().to_dict()["index"][:5] == [ datetime(2000, 1, 1), datetime(2000, 2, 1), datetime(2000, 3, 1), datetime(2000, 4, 1), datetime(2000, 5, 1) ]
def test_table_date_series(self, util): data = util.make_series(freq="D") tbl = Table(data) assert tbl.size() == 10 assert tbl.schema() == { "index": date, "0": float } assert tbl.view().to_dict()["index"] == [ datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 4), datetime(2000, 1, 5), datetime(2000, 1, 6), datetime(2000, 1, 7), datetime(2000, 1, 8), datetime(2000, 1, 9), datetime(2000, 1, 10) ]
def test_table_time_series(self, util): data = util.make_series(freq="H") tbl = Table(data) assert tbl.size() == 10 assert tbl.schema() == { "index": datetime, "0": float } assert tbl.view().to_dict()["index"] == [ datetime(2000, 1, 1, 0, 0, 0), datetime(2000, 1, 1, 1, 0, 0), datetime(2000, 1, 1, 2, 0, 0), datetime(2000, 1, 1, 3, 0, 0), datetime(2000, 1, 1, 4, 0, 0), datetime(2000, 1, 1, 5, 0, 0), datetime(2000, 1, 1, 6, 0, 0), datetime(2000, 1, 1, 7, 0, 0), datetime(2000, 1, 1, 8, 0, 0), datetime(2000, 1, 1, 9, 0, 0) ]
def test_table_dataframe_minute_index(self, util): data = util.make_dataframe(size=5, freq="min") tbl = Table(data) assert tbl.size() == 5 assert tbl.schema() == { "index": datetime, "a": float, "b": float, "c": float, "d": float } assert tbl.view().to_dict()["index"] == [ datetime(2000, 1, 1, 0, 0), datetime(2000, 1, 1, 0, 1), datetime(2000, 1, 1, 0, 2), datetime(2000, 1, 1, 0, 3), datetime(2000, 1, 1, 0, 4) ]
def test_arbitary_port_updates(self): table = Table(data) port_ids = [] for i in range(10): port_ids.append(table.make_port()) assert port_ids == list(range(1, 11)) port = random.randint(0, 10) table.update(data, port_id=port) assert table.size() == 8 assert table.view().to_dict() == { "a": [1, 2, 3, 4] * 2, "b": ["a", "b", "c", "d"] * 2, "c": [True, False, True, False] * 2 }
def test_table_np_implicit_index(self): data = { "a": np.array(["a", "b", "c", "d", "e"]), "b": np.array([1, 2, 3, 4, 5]) } tbl = Table(data) assert tbl.size() == 5 assert tbl.schema() == { "a": str, "b": int } tbl.update({ "__INDEX__": np.array([1, 2, 3, 4]), "a": np.array(["bb", "cc", "dd", "ee"]) }) assert tbl.view().to_dict() == { "a": ["a", "bb", "cc", "dd", "ee"], "b": [1, 2, 3, 4, 5] }
def test_update_arrow_partial_updates_dictionary_stream_duplicates(self, util): """If there are duplicate values in the dictionary, primary keys may be duplicated if the column is used as an index. Skip this test for now - still looking for the best way to fix.""" data = [ ([0, 1, 1, None, 2], ["a", "b", "a"]), ([0, 1, None, 2, 1], ["x", "y", "z"]) ] arrow_data = util.make_dictionary_arrow(["a", "b"], data) tbl = Table({ "a": str, "b": str }, index="a") tbl.update(arrow_data) assert tbl.size() == 3 assert tbl.view().to_dict() == { "a": [None, "a", "b"], "b": ["z", "x", "y"] }
def test_update_arrow_updates_append_timestamp_all_formats_stream( self, util): data = [[datetime(2019, 2, i, 9) for i in range(1, 11)], [datetime(2019, 2, i, 10) for i in range(1, 11)], [datetime(2019, 2, i, 11) for i in range(1, 11)], [datetime(2019, 2, i, 12) for i in range(1, 11)]] arrow_data = util.make_arrow(names, data, types=[ pa.timestamp("s"), pa.timestamp("ms"), pa.timestamp("us"), pa.timestamp("ns"), ]) tbl = Table(arrow_data) tbl.update(arrow_data) assert tbl.size() == 20 assert tbl.view().to_dict() == { "a": data[0] + data[0], "b": data[1] + data[1], "c": data[2] + data[2], "d": data[3] + data[3], }
def test_update_arrow_arbitary_order(self, util): data = [[1, 2, 3, 4], ["a", "b", "c", "d"], [1, 2, 3, 4], ["a", "b", "c", "d"]] update_data = [[5, 6], ["e", "f"], [5, 6], ["e", "f"]] arrow = util.make_arrow(["a", "b", "c", "d"], data) update_arrow = util.make_arrow(["c", "b", "a", "d"], update_data) tbl = Table(arrow) assert tbl.schema() == { "a": int, "b": str, "c": int, "d": str } tbl.update(update_arrow) assert tbl.size() == 6 assert tbl.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6], "b": ["a", "b", "c", "d", "e", "f"], "c": [1, 2, 3, 4, 5, 6], "d": ["a", "b", "c", "d", "e", "f"] }
def test_update_arrow_thread_safe_datetime_index(self, util): data = [["a", "b", "c"] for i in range(10)] data += [[ datetime(2020, 1, 15, 12, 17), datetime(2020, 1, 15, 12, 18), datetime(2020, 1, 15, 12, 19) ]] names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"] arrow = util.make_arrow(names, data) tbl = Table(arrow, index="uid") for i in range(100): idx = (datetime(2020, 1, 15, 12, 17), datetime(2020, 1, 15, 12, 18), datetime(2020, 1, 15, 12, 19))[random.randint(0, 2)] update_data = [[ str(uuid.uuid4()) + str(random.randint(100, 1000000000)) ], [idx]] update_names = [names[random.randint(0, 9)], "uid"] update_arrow = util.make_arrow(update_names, update_data) tbl.update(update_arrow) assert tbl.size() == 3
def test_update_bool_str_all_formats_from_schema(self): bool_data = [{ "a": "True", "b": "False" }, { "a": "t", "b": "f" }, { "a": "true", "b": "false" }, { "a": 1, "b": 0 }, { "a": "on", "b": "off" }] tbl = Table({"a": bool, "b": bool}) tbl.update(bool_data) assert tbl.size() == 5 assert tbl.view().to_dict() == { "a": [True, True, True, True, True], "b": [False, False, False, False, False] }
def test_colpivots(self): arrays = [ np.array([ 'bar', 'bar', 'bar', 'bar', 'baz', 'baz', 'baz', 'baz', 'foo', 'foo', 'foo', 'foo', 'qux', 'qux', 'qux', 'qux' ]), np.array([ 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two' ]), np.array([ 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y' ]) ] tuples = list(zip(*arrays)) index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third']) df_both = pd.DataFrame(np.random.randn(3, 16), index=['A', 'B', 'C'], columns=index) table = Table(df_both) assert table.size() == 48
def test_table_np_promote(self): data = { "a": np.arange(5), "b": np.full(5, np.nan), "c": np.array([1, 2, 3, 2147483648, 5]) } tbl = Table({ "a": int, "b": float, "c": int }) tbl.update(data) assert tbl.size() == 5 assert tbl.schema() == { "a": int, "b": float, "c": int } assert tbl.view().to_dict() == { "a": [0, 1, 2, 3, 4], "b": [None, None, None, None, None], "c": [1.0, 2.0, 3.0, 2147483648.0, 5.0] }
def test_table_datetime(self): str_data = [{"a": datetime.now(), "b": datetime.now()}] tbl = Table(str_data) assert tbl.size() == 1 assert tbl.schema() == {"a": datetime, "b": datetime}
def test_table_series(self): import pandas as pd data = pd.Series([1, 2, 3], name="a") tbl = Table(data) assert tbl.size() == 3
def test_table_infer_bool_str(self): bool_data = [{"a": "True", "b": "False"}, {"a": "True", "b": "True"}] tbl = Table(bool_data) assert tbl.size() == 2 assert tbl.schema() == {"a": bool, "b": bool}
def test_table_infer_bool(self): bool_data = [{"a": True, "b": False}, {"a": True, "b": True}] tbl = Table(bool_data) assert tbl.size() == 2 assert tbl.schema() == {"a": bool, "b": bool}
def test_table_float(self): data = {"a": np.array([1.1, 2.2]), "b": np.array([3.3, 4.4])} tbl = Table(data) assert tbl.size() == 2 assert tbl.view().to_dict() == {"a": [1.1, 2.2], "b": [3.3, 4.4]}
def test_empty_table(self): tbl = Table([]) assert tbl.size() == 0
def test_table_indexed_series(self): import pandas as pd data = pd.Series([1, 2, 3], index=["a", "b", "c"], name="a") tbl = Table(data) assert tbl.schema() == {"index": str, "a": int} assert tbl.size() == 3
def test_table_columnar(self): data = {"a": [1, 2, 3], "b": [4, 5, 6]} tbl = Table(data) assert tbl.columns() == ["a", "b"] assert tbl.size() == 3 assert tbl.schema() == {"a": int, "b": int}
def test_update_bool_from_schema(self): bool_data = [{"a": True, "b": False}, {"a": True, "b": True}] tbl = Table({"a": bool, "b": bool}) tbl.update(bool_data) assert tbl.size() == 2 assert tbl.view().to_records() == bool_data
def test_table_int(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data) assert tbl.size() == 2 assert tbl.schema() == {"a": int, "b": int}
def test_update_bool_int_from_schema(self): bool_data = [{"a": 1, "b": 0}, {"a": 1, "b": 0}] tbl = Table({"a": bool, "b": bool}) tbl.update(bool_data) assert tbl.size() == 2 assert tbl.view().to_dict() == {"a": [True, True], "b": [False, False]}
def test_table_index(self): data = [{"a": 1, "b": 2}, {"a": 1, "b": 4}] tbl = Table(data, index="a") assert tbl.size() == 1 assert tbl.view().to_records() == [{"a": 1, "b": 4}]
def test_table_bool(self): data = {"a": np.array([True, False]), "b": np.array([False, True])} tbl = Table(data) assert tbl.size() == 2 assert tbl.view().to_dict() == {"a": [True, False], "b": [False, True]}
def test_table_limit(self): data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}] tbl = Table(data, limit=1) assert tbl.size() == 1 assert tbl.view().to_records() == [{"a": 3, "b": 4}]
def test_table_int(self): data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6])} tbl = Table(data) assert tbl.size() == 3 assert tbl.view().to_dict() == {"a": [1, 2, 3], "b": [4, 5, 6]}
def test_table_nones(self): none_data = [{"a": 1, "b": None}, {"a": None, "b": 2}] tbl = Table(none_data) assert tbl.size() == 2 assert tbl.schema() == {"a": int, "b": int}