Пример #1
0
 def test_read(self, protocol, get_random_path):
     with tm.ensure_clean(get_random_path) as path:
         df = tm.makeDataFrame()
         df.to_pickle(path, protocol=protocol)
         df2 = pd.read_pickle(path)
         tm.assert_frame_equal(df, df2)
Пример #2
0
 def test_path_localpath(self):
     df = tm.makeDataFrame().reset_index()
     result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
     tm.assert_frame_equal(df, result)
Пример #3
0
def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)
Пример #4
0
 def test_write_explicit_bad(self, compression, get_random_path):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean(get_random_path) as path:
             df = tm.makeDataFrame()
             df.to_pickle(path, compression=compression)
Пример #5
0
 def test_unknown_engine(self):
     with tm.ensure_clean() as path:
         df = tm.makeDataFrame()
         df.to_csv(path)
         with pytest.raises(ValueError, match="Unknown engine"):
             pd.read_csv(path, engine="pyt")
Пример #6
0
 def test_passthrough_keywords(self):
     df = tm.makeDataFrame().reset_index()
     self.check_round_trip(df, write_kwargs=dict(version=1))
Пример #7
0
def test_multiple_open_close(setup_path):
    # gh-4409: open & close multiple times

    with ensure_clean_path(setup_path) as path:

        df = tm.makeDataFrame()
        df.to_hdf(path, "df", mode="w", format="table")

        # single
        store = HDFStore(path)
        assert "CLOSED" not in store.info()
        assert store.is_open

        store.close()
        assert "CLOSED" in store.info()
        assert not store.is_open

    with ensure_clean_path(setup_path) as path:

        if pytables._table_file_open_policy_is_strict:
            # multiples
            store1 = HDFStore(path)
            msg = (
                r"The file [\S]* is already opened\.  Please close it before "
                r"reopening in write mode\."
            )
            with pytest.raises(ValueError, match=msg):
                HDFStore(path)

            store1.close()
        else:

            # multiples
            store1 = HDFStore(path)
            store2 = HDFStore(path)

            assert "CLOSED" not in store1.info()
            assert "CLOSED" not in store2.info()
            assert store1.is_open
            assert store2.is_open

            store1.close()
            assert "CLOSED" in store1.info()
            assert not store1.is_open
            assert "CLOSED" not in store2.info()
            assert store2.is_open

            store2.close()
            assert "CLOSED" in store1.info()
            assert "CLOSED" in store2.info()
            assert not store1.is_open
            assert not store2.is_open

            # nested close
            store = HDFStore(path, mode="w")
            store.append("df", df)

            store2 = HDFStore(path)
            store2.append("df2", df)
            store2.close()
            assert "CLOSED" in store2.info()
            assert not store2.is_open

            store.close()
            assert "CLOSED" in store.info()
            assert not store.is_open

            # double closing
            store = HDFStore(path, mode="w")
            store.append("df", df)

            store2 = HDFStore(path)
            store.close()
            assert "CLOSED" in store.info()
            assert not store.is_open

            store2.close()
            assert "CLOSED" in store2.info()
            assert not store2.is_open

    # ops on a closed store
    with ensure_clean_path(setup_path) as path:

        df = tm.makeDataFrame()
        df.to_hdf(path, "df", mode="w", format="table")

        store = HDFStore(path)
        store.close()

        msg = r"[\S]* file is not open!"
        with pytest.raises(ClosedFileError, match=msg):
            store.keys()

        with pytest.raises(ClosedFileError, match=msg):
            "df" in store

        with pytest.raises(ClosedFileError, match=msg):
            len(store)

        with pytest.raises(ClosedFileError, match=msg):
            store["df"]

        with pytest.raises(ClosedFileError, match=msg):
            store.select("df")

        with pytest.raises(ClosedFileError, match=msg):
            store.get("df")

        with pytest.raises(ClosedFileError, match=msg):
            store.append("df2", df)

        with pytest.raises(ClosedFileError, match=msg):
            store.put("df3", df)

        with pytest.raises(ClosedFileError, match=msg):
            store.get_storer("df2")

        with pytest.raises(ClosedFileError, match=msg):
            store.remove("df2")

        with pytest.raises(ClosedFileError, match=msg):
            store.select("df")

        msg = "'HDFStore' object has no attribute 'df'"
        with pytest.raises(AttributeError, match=msg):
            store.df
Пример #8
0
def test_path_local_path(all_parsers):
    parser = all_parsers
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_csv,
                                     lambda p: parser.read_csv(p, index_col=0))
    tm.assert_frame_equal(df, result)
Пример #9
0
def test_select_dtypes(setup_path):

    with ensure_clean_store(setup_path) as store:
        # with a Timestamp data column (GH #2637)
        df = DataFrame(
            {
                "ts": bdate_range("2012-01-01", periods=300),
                "A": np.random.randn(300),
            }
        )
        _maybe_remove(store, "df")
        store.append("df", df, data_columns=["ts", "A"])

        result = store.select("df", "ts>=Timestamp('2012-02-01')")
        expected = df[df.ts >= Timestamp("2012-02-01")]
        tm.assert_frame_equal(expected, result)

        # bool columns (GH #2849)
        df = DataFrame(np.random.randn(5, 2), columns=["A", "B"])
        df["object"] = "foo"
        df.loc[4:5, "object"] = "bar"
        df["boolv"] = df["A"] > 0
        _maybe_remove(store, "df")
        store.append("df", df, data_columns=True)

        expected = df[df.boolv == True].reindex(columns=["A", "boolv"])  # noqa
        for v in [True, "true", 1]:
            result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])
            tm.assert_frame_equal(expected, result)

        expected = df[df.boolv == False].reindex(columns=["A", "boolv"])  # noqa
        for v in [False, "false", 0]:
            result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])
            tm.assert_frame_equal(expected, result)

        # integer index
        df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)})
        _maybe_remove(store, "df_int")
        store.append("df_int", df)
        result = store.select("df_int", "index<10 and columns=['A']")
        expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
        tm.assert_frame_equal(expected, result)

        # float index
        df = DataFrame(
            {
                "A": np.random.rand(20),
                "B": np.random.rand(20),
                "index": np.arange(20, dtype="f8"),
            }
        )
        _maybe_remove(store, "df_float")
        store.append("df_float", df)
        result = store.select("df_float", "index<10.0 and columns=['A']")
        expected = df.reindex(index=list(df.index)[0:10], columns=["A"])
        tm.assert_frame_equal(expected, result)

    with ensure_clean_store(setup_path) as store:

        # floats w/o NaN
        df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64")
        df["cols"] = (df["cols"] + 10).apply(str)

        store.append("df1", df, data_columns=True)
        result = store.select("df1", where="values>2.0")
        expected = df[df["values"] > 2.0]
        tm.assert_frame_equal(expected, result)

        # floats with NaN
        df.iloc[0] = np.nan
        expected = df[df["values"] > 2.0]

        store.append("df2", df, data_columns=True, index=False)
        result = store.select("df2", where="values>2.0")
        tm.assert_frame_equal(expected, result)

        # https://github.com/PyTables/PyTables/issues/282
        # bug in selection when 0th row has a np.nan and an index
        # store.append('df3',df,data_columns=True)
        # result = store.select(
        #    'df3', where='values>2.0')
        # tm.assert_frame_equal(expected, result)

        # not in first position float with NaN ok too
        df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64")
        df["cols"] = (df["cols"] + 10).apply(str)

        df.iloc[1] = np.nan
        expected = df[df["values"] > 2.0]

        store.append("df4", df, data_columns=True)
        result = store.select("df4", where="values>2.0")
        tm.assert_frame_equal(expected, result)

    # test selection with comparison against numpy scalar
    # GH 11283
    with ensure_clean_store(setup_path) as store:
        df = tm.makeDataFrame()

        expected = df[df["A"] > 0]

        store.append("df", df, data_columns=True)
        np_zero = np.float64(0)  # noqa
        result = store.select("df", where=["A>np_zero"])
        tm.assert_frame_equal(expected, result)
Пример #10
0
def test_append_raise(setup_path):

    with ensure_clean_store(setup_path) as store:

        # test append with invalid input to get good error messages

        # list in column
        df = tm.makeDataFrame()
        df["invalid"] = [["a"]] * len(df)
        assert df.dtypes["invalid"] == np.object_
        msg = re.escape(
            """Cannot serialize the column [invalid]
because its data contents are not [string] but [mixed] object dtype"""
        )
        with pytest.raises(TypeError, match=msg):
            store.append("df", df)

        # multiple invalid columns
        df["invalid2"] = [["a"]] * len(df)
        df["invalid3"] = [["a"]] * len(df)
        with pytest.raises(TypeError, match=msg):
            store.append("df", df)

        # datetime with embedded nans as object
        df = tm.makeDataFrame()
        s = Series(datetime.datetime(2001, 1, 2), index=df.index)
        s = s.astype(object)
        s[0:5] = np.nan
        df["invalid"] = s
        assert df.dtypes["invalid"] == np.object_
        msg = "too many timezones in this block, create separate data columns"
        with pytest.raises(TypeError, match=msg):
            store.append("df", df)

        # directly ndarray
        msg = "value must be None, Series, or DataFrame"
        with pytest.raises(TypeError, match=msg):
            store.append("df", np.arange(10))

        # series directly
        msg = re.escape(
            "cannot properly create the storer for: "
            "[group->df,value-><class 'pandas.core.series.Series'>]"
        )
        with pytest.raises(TypeError, match=msg):
            store.append("df", Series(np.arange(10)))

        # appending an incompatible table
        df = tm.makeDataFrame()
        store.append("df", df)

        df["foo"] = "foo"
        msg = re.escape(
            "invalid combination of [non_index_axes] on appending data "
            "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table "
            "[(1, ['A', 'B', 'C', 'D'])]"
        )
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

        # incompatible type (GH 41897)
        _maybe_remove(store, "df")
        df["foo"] = Timestamp("20130101")
        store.append("df", df)
        df["foo"] = "bar"
        msg = re.escape(
            "invalid combination of [values_axes] on appending data "
            "[name->values_block_1,cname->values_block_1,"
            "dtype->bytes24,kind->string,shape->(1, 30)] "
            "vs current table "
            "[name->values_block_1,cname->values_block_1,"
            "dtype->datetime64,kind->datetime64,shape->None]"
        )
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)
Пример #11
0
def test_pickle_path_localpath(setup_path):
    df = tm.makeDataFrame()
    result = tm.round_trip_pathlib(lambda p: df.to_hdf(p, "df"),
                                   lambda p: read_hdf(p, "df"))
    tm.assert_frame_equal(df, result)