Пример #1
0
    def test_table_arrow_loads_dictionary_stream_int8(self, util):
        data = [
            ([0, 1, 1, None], ["abc", "def"]),
            ([0, 1, None, 2], ["xx", "yy", "zz"])
        ]
        types = [[pa.int8(), pa.string()]] * 2
        arrow_data = util.make_dictionary_arrow(["a", "b"],
                                                data,
                                                types=types)
        tbl = Table(arrow_data)

        assert tbl.size() == 4
        assert tbl.schema() == {
            "a": str,
            "b": str
        }
        assert tbl.view().to_dict() == {
            "a": ["abc", "def", "def", None],
            "b": ["xx", "yy", None, "zz"]
        }
Пример #2
0
    def test_table_mixed_schema(self):
        data = {
            "a": int,
            "b": float,
            "c": str,
            "d": bool,
            "e": date,
            "f": datetime
        }

        tbl = Table(data)

        assert tbl.schema() == {
            "a": int,
            "b": float,
            "c": str,
            "d": bool,
            "e": date,
            "f": datetime
        }
Пример #3
0
    def test_table_output_string_schema(self):
        data = {
            "a": int,
            "b": float,
            "c": str,
            "d": bool,
            "e": date,
            "f": datetime
        }

        tbl = Table(data)

        assert tbl.schema(True) == {
            "a": "integer",
            "b": "float",
            "c": "string",
            "d": "boolean",
            "e": "date",
            "f": "datetime"
        }
Пример #4
0
 def test_table_time_series(self, util):
     data = util.make_series(freq="H")
     tbl = Table(data)
     assert tbl.size() == 10
     assert tbl.schema() == {
         "index": datetime,
         "0": float
     }
     assert tbl.view().to_dict()["index"] == [
         datetime(2000, 1, 1, 0, 0, 0),
         datetime(2000, 1, 1, 1, 0, 0),
         datetime(2000, 1, 1, 2, 0, 0),
         datetime(2000, 1, 1, 3, 0, 0),
         datetime(2000, 1, 1, 4, 0, 0),
         datetime(2000, 1, 1, 5, 0, 0),
         datetime(2000, 1, 1, 6, 0, 0),
         datetime(2000, 1, 1, 7, 0, 0),
         datetime(2000, 1, 1, 8, 0, 0),
         datetime(2000, 1, 1, 9, 0, 0)
     ]
Пример #5
0
    def test_table_output_readable_schema(self):
        data = {
            "a": "int32",
            "b": "float64",
            "c": "str",
            "d": "bool",
            "e": "date",
            "f": "datetime"
        }

        tbl = Table(data)

        assert tbl.schema() == {
            "a": int,
            "b": float,
            "c": str,
            "d": bool,
            "e": date,
            "f": datetime
        }
Пример #6
0
    def test_table_pandas_periodindex(self, util):
        df = util.make_period_dataframe(30)
        tbl = Table(df)

        assert tbl.size() == 30
        assert tbl.schema() == {
            "index": date,
            "a": float,
            "b": float,
            "c": float,
            "d": float
        }

        assert tbl.view().to_dict()["index"][:5] == [
            datetime(2000, 1, 1),
            datetime(2000, 2, 1),
            datetime(2000, 3, 1),
            datetime(2000, 4, 1),
            datetime(2000, 5, 1)
        ]
Пример #7
0
 def test_table_date_series(self, util):
     data = util.make_series(freq="D")
     tbl = Table(data)
     assert tbl.size() == 10
     assert tbl.schema() == {
         "index": date,
         "0": float
     }
     assert tbl.view().to_dict()["index"] == [
         datetime(2000, 1, 1),
         datetime(2000, 1, 2),
         datetime(2000, 1, 3),
         datetime(2000, 1, 4),
         datetime(2000, 1, 5),
         datetime(2000, 1, 6),
         datetime(2000, 1, 7),
         datetime(2000, 1, 8),
         datetime(2000, 1, 9),
         datetime(2000, 1, 10)
     ]
Пример #8
0
    def test_table_np_datetime_Y(self):
        tbl = Table({
            "a":
            np.array([
                datetime(2017, 5, 12, 11, 0),
                datetime(2018, 6, 12, 11, 0),
                datetime(2019, 7, 12, 11, 0)
            ],
                     dtype="datetime64[Y]")
        })

        assert tbl.schema() == {"a": date}

        assert tbl.view().to_dict() == {
            "a": [
                datetime(2017, 1, 1, 0, 0),
                datetime(2018, 1, 1, 0, 0),
                datetime(2019, 1, 1, 0, 0)
            ]
        }
Пример #9
0
    def test_table_dataframe_minute_index(self, util):
        data = util.make_dataframe(size=5, freq="min")

        tbl = Table(data)
        assert tbl.size() == 5
        assert tbl.schema() == {
            "index": datetime,
            "a": float,
            "b": float,
            "c": float,
            "d": float
        }

        assert tbl.view().to_dict()["index"] == [
            datetime(2000, 1, 1, 0, 0),
            datetime(2000, 1, 1, 0, 1),
            datetime(2000, 1, 1, 0, 2),
            datetime(2000, 1, 1, 0, 3),
            datetime(2000, 1, 1, 0, 4)
        ]
    def test_table_np_implicit_index(self):
        data = {
            "a": np.array(["a", "b", "c", "d", "e"]),
            "b": np.array([1, 2, 3, 4, 5])
        }
        tbl = Table(data)
        assert tbl.size() == 5
        assert tbl.schema() == {
            "a": str,
            "b": int
        }
        tbl.update({
            "__INDEX__": np.array([1, 2, 3, 4]),
            "a": np.array(["bb", "cc", "dd", "ee"])
        })

        assert tbl.view().to_dict() == {
            "a": ["a", "bb", "cc", "dd", "ee"],
            "b": [1, 2, 3, 4, 5]
        }
Пример #11
0
    def test_update_arrow_updates_less_columns_stream_file(self):
        tbl = Table({
            "a": int,
            "x": float,
        })

        with open(SOURCE_STREAM_ARROW, mode='rb') as file:  # b is important -> binary
            tbl.update(file.read())
            assert tbl.size() == 4
            assert tbl.schema() == {
                "a": int,
                "x": float
            }

        with open(SOURCE_FILE_ARROW, mode='rb') as file:
            tbl.update(file.read())
            assert tbl.size() == 8
            assert tbl.view().to_dict() == {
                "a": [1, 2, 3, 4] * 2,
                "x": [None for i in range(8)]
            }
Пример #12
0
    def test_object_referencecount_update_clear(self):
        t = CustomObjectStore(1)
        data = {"a": [t]}
        tbl = Table(data)
        assert tbl.schema() == {"a": object}
        assert tbl.size() == 1
        assert tbl.view().to_dict() == {"a": [t]}

        # Count references
        # 1 for `t`, one for `data`, one for argument to sys.getrefcount, and one for the table
        assert sys.getrefcount(t) == 4

        # do random number of updates
        count = randint(5, 10)
        for _ in range(count):
            tbl.update([data])

        tbl.clear()
        assert tbl.size() == 0
        assert tbl.view().to_dict() == {}
        # 1 for `t`, one for `data`, one for argument to sys.getrefcount
        assert sys.getrefcount(t) == 3
Пример #13
0
 def test_update_arrow_arbitary_order(self, util):
     data = [[1, 2, 3, 4],
             ["a", "b", "c", "d"],
             [1, 2, 3, 4],
             ["a", "b", "c", "d"]]
     update_data = [[5, 6], ["e", "f"], [5, 6], ["e", "f"]]
     arrow = util.make_arrow(["a", "b", "c", "d"], data)
     update_arrow = util.make_arrow(["c", "b", "a", "d"], update_data)
     tbl = Table(arrow)
     assert tbl.schema() == {
         "a": int,
         "b": str,
         "c": int,
         "d": str
     }
     tbl.update(update_arrow)
     assert tbl.size() == 6
     assert tbl.view().to_dict() == {
         "a": [1, 2, 3, 4, 5, 6],
         "b": ["a", "b", "c", "d", "e", "f"],
         "c": [1, 2, 3, 4, 5, 6],
         "d": ["a", "b", "c", "d", "e", "f"]
     }
Пример #14
0
    def test_update_arrow_updates_stream_file(self):
        tbl = Table({
            "a": int,
            "b": float,
            "c": str
        })

        with open(SOURCE_STREAM_ARROW, mode='rb') as file:  # b is important -> binary
            tbl.update(file.read())
            assert tbl.size() == 4
            assert tbl.schema() == {
                "a": int,
                "b": float,
                "c": str
            }

        with open(SOURCE_FILE_ARROW, mode='rb') as file:
            tbl.update(file.read())
            assert tbl.size() == 8
            assert tbl.view().to_dict() == {
                "a": [1, 2, 3, 4] * 2,
                "b": [1.5, 2.5, 3.5, 4.5] * 2,
                "c": ["a", "b", "c", "d"] * 2
            }
 def test_table_bool_infer_str_all_formats_from_schema(self):
     bool_data = [{
         "a": "True",
         "b": "False"
     }, {
         "a": "t",
         "b": "f"
     }, {
         "a": "true",
         "b": "false"
     }, {
         "a": 1,
         "b": 0
     }, {
         "a": "on",
         "b": "off"
     }]
     tbl = Table(bool_data)
     assert tbl.schema() == {"a": bool, "b": bool}
     assert tbl.size() == 5
     assert tbl.view().to_dict() == {
         "a": [True, True, True, True, True],
         "b": [False, False, False, False, False]
     }
    def test_table_np_promote(self):
        data = {
            "a": np.arange(5),
            "b": np.full(5, np.nan),
            "c": np.array([1, 2, 3, 2147483648, 5])
        }
        tbl = Table({
            "a": int,
            "b": float,
            "c": int
        })
        tbl.update(data)
        assert tbl.size() == 5
        assert tbl.schema() == {
            "a": int,
            "b": float,
            "c": int
        }

        assert tbl.view().to_dict() == {
            "a": [0, 1, 2, 3, 4],
            "b": [None, None, None, None, None],
            "c": [1.0, 2.0, 3.0, 2147483648.0, 5.0]
        }
Пример #17
0
 def test_table_infer_datetime_edge(self):
     data = {
         "a": [None, None, None, None, None, None, "08/31/2019 00:00:01"]
     }
     tbl = Table(data)
     assert tbl.schema() == {"a": datetime}
Пример #18
0
 def test_table_infer_ymd_date(self):
     data = {"a": [None, None, None, None, None, None, "2019/01/03"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": date}
Пример #19
0
 def test_table_infer_invalid_date(self):
     data = {"a": [None, None, None, None, None, None, "08/55/2019"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": str}
Пример #20
0
 def test_table_infer_date_from_date(self):
     # pass in a `date` to make sure it infers as date
     data = {"a": [None, None, None, None, None, None, date(2019, 7, 11)]}
     tbl = Table(data)
     assert tbl.schema() == {"a": date}
Пример #21
0
 def test_table_infer_ambiguous_date(self):
     data = {"a": [None, None, None, None, None, None, "01/03/2019"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": date}
Пример #22
0
 def test_table_infer_bool(self):
     data = {"a": [None, None, None, None, True, True, True]}
     tbl = Table(data)
     assert tbl.schema() == {"a": bool}
Пример #23
0
 def test_table_infer_str(self):
     data = {"a": [None, None, None, None, None, None, "abc"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": str}
Пример #24
0
 def test_table_infer_float(self):
     data = {"a": [None, None, None, None, 1.0, 2.0]}
     tbl = Table(data)
     assert tbl.schema() == {"a": float}
Пример #25
0
 def test_table_datetime_infer_no_false_positive(self):
     data = {"a": [" . - / but clearly not a date"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": str}
Пример #26
0
 def test_table_strict_datetime_separator_infer(self):
     data = {"a": ["2019-10-01 7:30"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": datetime}
Пример #27
0
 def test_table_strict_date_infer(self):
     data = {"a": ["2019 09 10"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": date}
Пример #28
0
 def test_table_strict_datetime_infer(self):
     data = {"a": ['10', '9', '8', '7', '6', '5', '4', '3', '2', '1']}
     tbl = Table(data)
     assert tbl.schema() == {"a": str}
 def test_table_recarray(self):
     d = np.array([(1.0, 2), (3.0, 4)],
                  dtype=[('x', '<f8'), ('y', '<i8')]).view(np.recarray)
     table = Table(d)
     assert table.schema() == {"x": float, "y": int}
     assert table.view().to_dict() == {"x": [1.0, 3.0], "y": [2, 4]}
Пример #30
0
 def test_table_infer_mixed_datetime(self):
     data = {"a": [None, None, None, None, None, "08/11/2019 13:14:15"]}
     tbl = Table(data)
     assert tbl.schema() == {"a": datetime}