Exemplo n.º 1
0
def test_categorical_conversion(setup_path):

    # GH13322
    # Check that read_hdf with categorical columns doesn't return rows if
    # where criteria isn't met.
    obsids = ["ESP_012345_6789", "ESP_987654_3210"]
    imgids = ["APF00006np", "APF0001imm"]
    data = [4.3, 9.8]

    # Test without categories
    df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})

    # We are expecting an empty DataFrame matching types of df
    expected = df.iloc[[], :]
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", data_columns=True)
        result = read_hdf(path, "df", where="obsids=B")
        tm.assert_frame_equal(result, expected)

    # Test with categories
    df.obsids = df.obsids.astype("category")
    df.imgids = df.imgids.astype("category")

    # We are expecting an empty DataFrame matching types of df
    expected = df.iloc[[], :]
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", data_columns=True)
        result = read_hdf(path, "df", where="obsids=B")
        tm.assert_frame_equal(result, expected)
Exemplo n.º 2
0
def test_to_hdf_with_object_column_names(setup_path):
    # GH9057

    types_should_fail = [
        tm.makeIntIndex,
        tm.makeFloatIndex,
        tm.makeDateIndex,
        tm.makeTimedeltaIndex,
        tm.makePeriodIndex,
    ]
    types_should_run = [
        tm.makeStringIndex,
        tm.makeCategoricalIndex,
        tm.makeUnicodeIndex,
    ]

    for index in types_should_fail:
        df = DataFrame(np.random.randn(10, 2), columns=index(2))
        with ensure_clean_path(setup_path) as path:
            with catch_warnings(record=True):
                msg = "cannot have non-object label DataIndexableCol"
                with pytest.raises(ValueError, match=msg):
                    df.to_hdf(path, "df", format="table", data_columns=True)

    for index in types_should_run:
        df = DataFrame(np.random.randn(10, 2), columns=index(2))
        with ensure_clean_path(setup_path) as path:
            with catch_warnings(record=True):
                df.to_hdf(path, "df", format="table", data_columns=True)
                result = read_hdf(path, "df", where=f"index = [{df.index[0]}]")
                assert len(result)
Exemplo n.º 3
0
def test_store_dropna(setup_path):
    df_with_missing = DataFrame(
        {
            "col1": [0.0, np.nan, 2.0],
            "col2": [1.0, np.nan, np.nan]
        },
        index=list("abc"),
    )
    df_without_missing = DataFrame({
        "col1": [0.0, 2.0],
        "col2": [1.0, np.nan]
    },
                                   index=list("ac"))

    # # Test to make sure defaults are to not drop.
    # # Corresponding to Issue 9382
    with ensure_clean_path(setup_path) as path:
        df_with_missing.to_hdf(path, "df", format="table")
        reloaded = read_hdf(path, "df")
        tm.assert_frame_equal(df_with_missing, reloaded)

    with ensure_clean_path(setup_path) as path:
        df_with_missing.to_hdf(path, "df", format="table", dropna=False)
        reloaded = read_hdf(path, "df")
        tm.assert_frame_equal(df_with_missing, reloaded)

    with ensure_clean_path(setup_path) as path:
        df_with_missing.to_hdf(path, "df", format="table", dropna=True)
        reloaded = read_hdf(path, "df")
        tm.assert_frame_equal(df_without_missing, reloaded)
Exemplo n.º 4
0
def test_invalid_terms(setup_path):

    with ensure_clean_store(setup_path) as store:

        with catch_warnings(record=True):

            df = tm.makeTimeDataFrame()
            df["string"] = "foo"
            df.loc[df.index[0:4], "string"] = "bar"

            store.put("df", df, format="table")

            # some invalid terms
            msg = re.escape(
                "__init__() missing 1 required positional argument: 'where'")
            with pytest.raises(TypeError, match=msg):
                Term()

            # more invalid
            msg = re.escape("cannot process expression [df.index[3]], "
                            "[2000-01-06 00:00:00] is not a valid condition")
            with pytest.raises(ValueError, match=msg):
                store.select("df", "df.index[3]")

            msg = "invalid syntax"
            with pytest.raises(SyntaxError, match=msg):
                store.select("df", "index>")

    # from the docs
    with ensure_clean_path(setup_path) as path:
        dfq = DataFrame(
            np.random.randn(10, 4),
            columns=list("ABCD"),
            index=date_range("20130101", periods=10),
        )
        dfq.to_hdf(path, "dfq", format="table", data_columns=True)

        # check ok
        read_hdf(path,
                 "dfq",
                 where="index>Timestamp('20130104') & columns=['A', 'B']")
        read_hdf(path, "dfq", where="A>0 or C>0")

    # catch the invalid reference
    with ensure_clean_path(setup_path) as path:
        dfq = DataFrame(
            np.random.randn(10, 4),
            columns=list("ABCD"),
            index=date_range("20130101", periods=10),
        )
        dfq.to_hdf(path, "dfq", format="table")

        msg = (r"The passed where expression: A>0 or C>0\n\s*"
               r"contains an invalid variable reference\n\s*"
               r"all of the variable references must be a reference to\n\s*"
               r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
               r"The currently defined references are: index,columns\n")
        with pytest.raises(ValueError, match=msg):
            read_hdf(path, "dfq", where="A>0 or C>0")
Exemplo n.º 5
0
def test_complex_series_error(setup_path):
    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
    s = Series(complex128, index=list("abcd"))

    with ensure_clean_path(setup_path) as path:
        with pytest.raises(TypeError):
            s.to_hdf(path, "obj", format="t")

    with ensure_clean_path(setup_path) as path:
        s.to_hdf(path, "obj", format="t", index=False)
        reread = read_hdf(path, "obj")
        tm.assert_series_equal(s, reread)
Exemplo n.º 6
0
def test_complibs_default_settings(setup_path):
    # GH15943
    df = tm.makeDataFrame()

    # Set complevel and check if complib is automatically set to
    # default value
    with ensure_clean_path(setup_path) as tmpfile:
        df.to_hdf(tmpfile, "df", complevel=9)
        result = read_hdf(tmpfile, "df")
        tm.assert_frame_equal(result, df)

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 9
                assert node.filters.complib == "zlib"

    # Set complib and check to see if compression is disabled
    with ensure_clean_path(setup_path) as tmpfile:
        df.to_hdf(tmpfile, "df", complib="zlib")
        result = read_hdf(tmpfile, "df")
        tm.assert_frame_equal(result, df)

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 0
                assert node.filters.complib is None

    # Check if not setting complib or complevel results in no compression
    with ensure_clean_path(setup_path) as tmpfile:
        df.to_hdf(tmpfile, "df")
        result = read_hdf(tmpfile, "df")
        tm.assert_frame_equal(result, df)

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 0
                assert node.filters.complib is None

    # Check if file-defaults can be overridden on a per table basis
    with ensure_clean_path(setup_path) as tmpfile:
        store = HDFStore(tmpfile)
        store.append("dfc", df, complevel=9, complib="blosc")
        store.append("df", df)
        store.close()

        with tables.open_file(tmpfile, mode="r") as h5file:
            for node in h5file.walk_nodes(where="/df", classname="Leaf"):
                assert node.filters.complevel == 0
                assert node.filters.complib is None
            for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):
                assert node.filters.complevel == 9
                assert node.filters.complib == "blosc"
def test_mode(setup_path, mode):

    df = tm.makeTimeDataFrame()
    msg = r"[\S]* does not exist"
    with ensure_clean_path(setup_path) as path:

        # constructor
        if mode in ["r", "r+"]:
            with pytest.raises(OSError, match=msg):
                HDFStore(path, mode=mode)

        else:
            store = HDFStore(path, mode=mode)
            assert store._handle.mode == mode
            store.close()

    with ensure_clean_path(setup_path) as path:

        # context
        if mode in ["r", "r+"]:
            with pytest.raises(OSError, match=msg):
                with HDFStore(path, mode=mode) as store:
                    pass
        else:
            with HDFStore(path, mode=mode) as store:
                assert store._handle.mode == mode

    with ensure_clean_path(setup_path) as path:

        # conv write
        if mode in ["r", "r+"]:
            with pytest.raises(OSError, match=msg):
                df.to_hdf(path, "df", mode=mode)
            df.to_hdf(path, "df", mode="w")
        else:
            df.to_hdf(path, "df", mode=mode)

        # conv read
        if mode in ["w"]:
            msg = (
                "mode w is not allowed while performing a read. "
                r"Allowed modes are r, r\+ and a."
            )
            with pytest.raises(ValueError, match=msg):
                read_hdf(path, "df", mode=mode)
        else:
            result = read_hdf(path, "df", mode=mode)
            tm.assert_frame_equal(result, df)
Exemplo n.º 8
0
    def test_supported_for_subclass_dataframe(self):
        data = {"a": [1, 2], "b": [3, 4]}
        sdf = tm.SubclassedDataFrame(data, dtype=np.intp)

        expected = DataFrame(data, dtype=np.intp)

        with ensure_clean_path("temp.h5") as path:
            sdf.to_hdf(path, "df")
            result = read_hdf(path, "df")
            tm.assert_frame_equal(result, expected)

        with ensure_clean_path("temp.h5") as path:
            with HDFStore(path) as store:
                store.put("df", sdf)
            result = read_hdf(path, "df")
            tm.assert_frame_equal(result, expected)
def test_default_mode(setup_path):
    # read_hdf uses default mode
    df = tm.makeTimeDataFrame()
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", mode="w")
        result = read_hdf(path, "df")
        tm.assert_frame_equal(result, df)
Exemplo n.º 10
0
    def test_supported_for_subclass_series(self):
        data = [1, 2, 3]
        sser = tm.SubclassedSeries(data, dtype=np.intp)

        expected = Series(data, dtype=np.intp)

        with ensure_clean_path("temp.h5") as path:
            sser.to_hdf(path, "ser")
            result = read_hdf(path, "ser")
            tm.assert_series_equal(result, expected)

        with ensure_clean_path("temp.h5") as path:
            with HDFStore(path) as store:
                store.put("ser", sser)
            result = read_hdf(path, "ser")
            tm.assert_series_equal(result, expected)
Exemplo n.º 11
0
def test_append_hierarchical(setup_path):
    index = MultiIndex(
        levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
        codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
        names=["foo", "bar"],
    )
    df = DataFrame(np.random.randn(10, 3),
                   index=index,
                   columns=["A", "B", "C"])

    with ensure_clean_store(setup_path) as store:
        store.append("mi", df)
        result = store.select("mi")
        tm.assert_frame_equal(result, df)

        # GH 3748
        result = store.select("mi", columns=["A", "B"])
        expected = df.reindex(columns=["A", "B"])
        tm.assert_frame_equal(result, expected)

    with ensure_clean_path("test.hdf") as path:
        df.to_hdf(path, "df", format="table")
        result = read_hdf(path, "df", columns=["A", "B"])
        expected = df.reindex(columns=["A", "B"])
        tm.assert_frame_equal(result, expected)
Exemplo n.º 12
0
def test_to_hdf_multiindex_extension_dtype(idx, setup_path):
    # GH 7775
    mi = MultiIndex.from_arrays([idx, idx])
    df = DataFrame(0, index=mi, columns=["a"])
    with ensure_clean_path(setup_path) as path:
        with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
            df.to_hdf(path, "df")
def test_retain_index_attributes2(setup_path):
    with ensure_clean_path(setup_path) as path:

        with catch_warnings(record=True):

            df = DataFrame({
                "A":
                Series(range(3),
                       index=date_range("2000-1-1", periods=3, freq="H"))
            })
            df.to_hdf(path, "data", mode="w", append=True)
            df2 = DataFrame({
                "A":
                Series(range(3),
                       index=date_range("2002-1-1", periods=3, freq="D"))
            })

            df2.to_hdf(path, "data", append=True)

            idx = date_range("2000-1-1", periods=3, freq="H")
            idx.name = "foo"
            df = DataFrame({"A": Series(range(3), index=idx)})
            df.to_hdf(path, "data", mode="w", append=True)

        assert read_hdf(path, "data").index.name == "foo"

        with catch_warnings(record=True):

            idx2 = date_range("2001-1-1", periods=3, freq="H")
            idx2.name = "bar"
            df2 = DataFrame({"A": Series(range(3), index=idx2)})
            df2.to_hdf(path, "data", append=True)

        assert read_hdf(path, "data").index.name is None
Exemplo n.º 14
0
def test_hdfstore_iteritems_deprecated(setup_path):
    with ensure_clean_path(setup_path) as path:
        df = DataFrame({"a": [1]})
        with HDFStore(path, mode="w") as hdf:
            hdf.put("table", df)
            with tm.assert_produces_warning(FutureWarning):
                next(hdf.iteritems())
Exemplo n.º 15
0
def test_complex_mixed_table(setup_path):
    complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                         dtype=np.complex64)
    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                          dtype=np.complex128)
    df = DataFrame(
        {
            "A": [1, 2, 3, 4],
            "B": ["a", "b", "c", "d"],
            "C": complex64,
            "D": complex128,
            "E": [1.0, 2.0, 3.0, 4.0],
        },
        index=list("abcd"),
    )

    with ensure_clean_store(setup_path) as store:
        store.append("df", df, data_columns=["A", "B"])
        result = store.select("df", where="A>2")
        tm.assert_frame_equal(df.loc[df.A > 2], result)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table")
        reread = read_hdf(path, "df")
        tm.assert_frame_equal(df, reread)
Exemplo n.º 16
0
    def check_default_mode():

        # read_hdf uses default mode
        with ensure_clean_path(setup_path) as path:
            df.to_hdf(path, "df", mode="w")
            result = read_hdf(path, "df")
            tm.assert_frame_equal(result, df)
Exemplo n.º 17
0
def test_complibs(setup_path):
    # GH14478
    df = tm.makeDataFrame()

    # Building list of all complibs and complevels tuples
    all_complibs = tables.filters.all_complibs
    # Remove lzo if its not available on this platform
    if not tables.which_lib_version("lzo"):
        all_complibs.remove("lzo")
    # Remove bzip2 if its not available on this platform
    if not tables.which_lib_version("bzip2"):
        all_complibs.remove("bzip2")

    all_levels = range(0, 10)
    all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]

    for (lib, lvl) in all_tests:
        with ensure_clean_path(setup_path) as tmpfile:
            gname = "foo"

            # Write and read file to see if data is consistent
            df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
            result = read_hdf(tmpfile, gname)
            tm.assert_frame_equal(result, df)

            # Open file and check metadata
            # for correct amount of compression
            h5table = tables.open_file(tmpfile, mode="r")
            for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
                assert node.filters.complevel == lvl
                if lvl == 0:
                    assert node.filters.complib is None
                else:
                    assert node.filters.complib == lib
            h5table.close()
Exemplo n.º 18
0
def test_complex_series_error(setup_path):
    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
    s = Series(complex128, index=list("abcd"))

    msg = ("Columns containing complex values can be stored "
           "but cannot be indexed when using table format. "
           "Either use fixed format, set index=False, "
           "or do not include the columns containing complex "
           "values to data_columns when initializing the table.")

    with ensure_clean_path(setup_path) as path:
        with pytest.raises(TypeError, match=msg):
            s.to_hdf(path, "obj", format="t")

    with ensure_clean_path(setup_path) as path:
        s.to_hdf(path, "obj", format="t", index=False)
        reread = read_hdf(path, "obj")
        tm.assert_series_equal(s, reread)
Exemplo n.º 19
0
def test_read_nokey_empty(setup_path):
    with ensure_clean_path(setup_path) as path:
        store = HDFStore(path)
        store.close()
        msg = re.escape(
            "Dataset(s) incompatible with Pandas data types, not table, or no "
            "datasets found in HDF5 file.")
        with pytest.raises(ValueError, match=msg):
            read_hdf(path)
Exemplo n.º 20
0
def test_read_hdf_series_mode_r(format, setup_path):
    # GH 16583
    # Tests that reading a Series saved to an HDF file
    # still works if a mode='r' argument is supplied
    series = tm.makeFloatSeries()
    with ensure_clean_path(setup_path) as path:
        series.to_hdf(path, key="data", format=format)
        result = pd.read_hdf(path, key="data", mode="r")
    tm.assert_series_equal(result, series)
Exemplo n.º 21
0
def test_format_type(setup_path):
    df = DataFrame({"A": [1, 2]})
    with ensure_clean_path(setup_path) as path:
        with HDFStore(path) as store:
            store.put("a", df, format="fixed")
            store.put("b", df, format="table")

            assert store.get_storer("a").format_type == "fixed"
            assert store.get_storer("b").format_type == "table"
Exemplo n.º 22
0
def test_select_empty_where(where):
    # GH26610

    df = DataFrame([1, 2, 3])
    with ensure_clean_path("empty_where.h5") as path:
        with HDFStore(path) as store:
            store.put("df", df, "t")
            result = read_hdf(store, "df", where=where)
            tm.assert_frame_equal(result, df)
Exemplo n.º 23
0
def test_to_hdf_errors(format, setup_path):

    data = ["\ud800foo"]
    ser = Series(data, index=Index(data))
    with ensure_clean_path(setup_path) as path:
        # GH 20835
        ser.to_hdf(path, "table", format=format, errors="surrogatepass")

        result = read_hdf(path, "table", errors="surrogatepass")
        tm.assert_series_equal(result, ser)
Exemplo n.º 24
0
def test_round_trip_equals(setup_path):
    # GH 9330
    df = DataFrame({"B": [1, 2], "A": ["x", "y"]})

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table")
        other = read_hdf(path, "df")
        tm.assert_frame_equal(df, other)
        assert df.equals(other)
        assert other.equals(df)
Exemplo n.º 25
0
def test_read_missing_key_close_store(setup_path):
    # GH 25766
    with ensure_clean_path(setup_path) as path:
        df = DataFrame({"a": range(2), "b": range(2)})
        df.to_hdf(path, "k1")

        with pytest.raises(KeyError, match="'No object named k2 in the file'"):
            pd.read_hdf(path, "k2")

        # smoke test to test that file is properly closed after
        # read with KeyError before another write
        df.to_hdf(path, "k2")
Exemplo n.º 26
0
def test_complex_fixed(setup_path):
    df = DataFrame(
        np.random.rand(4, 5).astype(np.complex64),
        index=list("abcd"),
        columns=list("ABCDE"),
    )

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df")
        reread = read_hdf(path, "df")
        tm.assert_frame_equal(df, reread)

    df = DataFrame(
        np.random.rand(4, 5).astype(np.complex128),
        index=list("abcd"),
        columns=list("ABCDE"),
    )
    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df")
        reread = read_hdf(path, "df")
        tm.assert_frame_equal(df, reread)
Exemplo n.º 27
0
def test_convert_value(setup_path, where: str, df: DataFrame, expected: DataFrame):
    # GH39420
    # Check that read_hdf with categorical columns can filter by where condition.
    df.col = df.col.astype("category")
    max_widths = {"col": 1}
    categorical_values = sorted(df.col.unique())
    expected.col = expected.col.astype("category")
    expected.col = expected.col.cat.set_categories(categorical_values)

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table", min_itemsize=max_widths)
        result = read_hdf(path, where=where)
        tm.assert_frame_equal(result, expected)
Exemplo n.º 28
0
def test_complex_across_dimensions_fixed(setup_path):
    with catch_warnings(record=True):
        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
        s = Series(complex128, index=list("abcd"))
        df = DataFrame({"A": s, "B": s})

        objs = [s, df]
        comps = [tm.assert_series_equal, tm.assert_frame_equal]
        for obj, comp in zip(objs, comps):
            with ensure_clean_path(setup_path) as path:
                obj.to_hdf(path, "obj", format="fixed")
                reread = read_hdf(path, "obj")
                comp(obj, reread)
Exemplo n.º 29
0
def test_read_with_where_tz_aware_index(setup_path):
    # GH 11926
    periods = 10
    dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC")
    mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
    expected = pd.DataFrame({"MYCOL": 0}, index=mi)

    key = "mykey"
    with ensure_clean_path(setup_path) as path:
        with pd.HDFStore(path) as store:
            store.append(key, expected, format="table", append=True)
        result = pd.read_hdf(path, key, where="DATE > 20151130")
        tm.assert_frame_equal(result, expected)
Exemplo n.º 30
0
def test_read_from_pathlib_path(setup_path):

    # GH11773
    expected = DataFrame(np.random.rand(4, 5),
                         index=list("abcd"),
                         columns=list("ABCDE"))
    with ensure_clean_path(setup_path) as filename:
        path_obj = Path(filename)

        expected.to_hdf(path_obj, "df", mode="a")
        actual = read_hdf(path_obj, "df")

    tm.assert_frame_equal(expected, actual)