def test_constructor_range(self): msg = "Value needs to be a scalar value, was type <class 'range'>" with pytest.raises(TypeError, match=msg): result = RangeIndex(range(1, 5, 2)) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) with pytest.raises(TypeError): Index(range(1, 5, 2), dtype='float64') msg = r'^from_range\(\) got an unexpected keyword argument' with pytest.raises(TypeError, match=msg): pd.RangeIndex.from_range(range(10), copy=True)
def test_constructor_range(self): pytest.raises(TypeError, lambda: RangeIndex(range(1, 5, 2))) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) pytest.raises(TypeError, lambda: Index(range(1, 5, 2), dtype='float64'))
def test_symmetric_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex left = RangeIndex.from_range(range(1, 10), name="foo") result = left.symmetric_difference(left) expected = RangeIndex.from_range(range(0), name="foo") tm.assert_index_equal(result, expected) result = left.symmetric_difference(expected.rename("bar")) tm.assert_index_equal(result, left.rename(None)) result = left[:-2].symmetric_difference(left[2:]) expected = Int64Index([1, 2, 8, 9], name="foo") tm.assert_index_equal(result, expected) right = RangeIndex.from_range(range(10, 15)) result = left.symmetric_difference(right) expected = RangeIndex.from_range(range(1, 15)) tm.assert_index_equal(result, expected) result = left.symmetric_difference(right[1:]) expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]) tm.assert_index_equal(result, expected)
def test_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex obj = RangeIndex.from_range(range(1, 10), name="foo") result = obj.difference(obj) expected = RangeIndex.from_range(range(0), name="foo") tm.assert_index_equal(result, expected, exact=True) result = obj.difference(expected.rename("bar")) tm.assert_index_equal(result, obj.rename(None), exact=True) result = obj.difference(obj[:3]) tm.assert_index_equal(result, obj[3:], exact=True) result = obj.difference(obj[-3:]) tm.assert_index_equal(result, obj[:-3], exact=True) result = obj[::-1].difference(obj[-3:]) tm.assert_index_equal(result, obj[:-3][::-1], exact=True) result = obj[::-1].difference(obj[-3:][::-1]) tm.assert_index_equal(result, obj[:-3][::-1], exact=True) result = obj.difference(obj[2:6]) expected = Int64Index([1, 2, 7, 8, 9], name="foo") tm.assert_index_equal(result, expected)
def test_constructor_range(self): self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) self.assertTrue(result.equals(expected)) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) self.assertTrue(result.equals(expected)) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) self.assertTrue(result.equals(expected)) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) self.assertTrue(result.equals(expected)) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) self.assertTrue(result.equals(expected)) self.assertRaises(TypeError, lambda: Index(range(1, 5, 2), dtype='float64'))
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): """ Write a DataFrame to the parquet format. Parameters ---------- df : DataFrame path : string File path engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet reader library to use. If 'auto', then the option 'io.parquet.engine' is used. If 'auto', then the first library to be installed is used. compression : str, optional, default 'snappy' compression method, includes {'gzip', 'snappy', 'brotli'} kwargs Additional keyword arguments passed to the engine """ impl = get_engine(engine) if not isinstance(df, DataFrame): raise ValueError("to_parquet only support IO with DataFrames") valid_types = {'string', 'unicode'} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): raise ValueError("parquet does not support serializing {} " "for the index; you can .reset_index()" "to make the index into column(s)".format( type(df.index))) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError("parquet does not support serializing a " "non-default index for the index; you " "can .reset_index() to make the index " "into column(s)") if df.index.name is not None: raise ValueError("parquet does not serialize index meta-data on a " "default index") # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("parquet must have string column names") return impl.write(df, path, compression=compression, **kwargs)
def test_difference_mismatched_step(self): obj = RangeIndex.from_range(range(1, 10), name="foo") result = obj.difference(obj[::2]) expected = obj[1::2]._int64index tm.assert_index_equal(result, expected, exact=True) result = obj.difference(obj[1::2]) expected = obj[::2]._int64index tm.assert_index_equal(result, expected, exact=True)
def to_feather(df: DataFrame, path): """ Write a DataFrame to the feather-format Parameters ---------- df : DataFrame path : string file path, or file-like object """ import_optional_dependency("pyarrow") from pyarrow import feather path = stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") valid_types = {"string", "unicode"} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " "for the index; you can .reset_index() " "to make the index into column(s)" ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError( "feather does not support serializing a " "non-default index for the index; you " "can .reset_index() to make the index " "into column(s)" ) if df.index.name is not None: raise ValueError( "feather does not serialize index meta-data on a default index" ) # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("feather must have string column names") feather.write_feather(df, path)
def test_constructor_range(self): msg = "Value needs to be a scalar value, was type <class 'range'>" with pytest.raises(TypeError, match=msg): result = RangeIndex(range(1, 5, 2)) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) with pytest.raises( ValueError, match="Incorrect `dtype` passed: expected signed integer, received float64", ): Index(range(1, 5, 2), dtype="float64") msg = r"^from_range\(\) got an unexpected keyword argument" with pytest.raises(TypeError, match=msg): RangeIndex.from_range(range(10), copy=True)
def test_constructor_range(self): result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) msg = ( r"(RangeIndex.)?from_range\(\) got an unexpected keyword argument( 'copy')?" ) with pytest.raises(TypeError, match=msg): RangeIndex.from_range(range(10), copy=True)
def to_feather(df, path): """ Write a DataFrame to the feather-format Parameters ---------- df : DataFrame path : string File path """ path = _stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") feather = _try_import() valid_types = {'string', 'unicode'} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): raise ValueError("feather does not support serializing {} " "for the index; you can .reset_index()" "to make the index into column(s)".format( type(df.index))) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError("feather does not support serializing a " "non-default index for the index; you " "can .reset_index() to make the index " "into column(s)") if df.index.name is not None: raise ValueError("feather does not serialize index meta-data on a " "default index") # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("feather must have string column names") feather.write_dataframe(df, path)
def to_feather( df: DataFrame, path: FilePathOrBuffer[AnyStr], storage_options: StorageOptions = None, **kwargs, ): """ Write a DataFrame to the binary Feather format. Parameters ---------- df : DataFrame path : string file path, or file-like object storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc., if using a URL that will be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". An error will be raised if providing this argument with a local path or a file-like buffer. See the fsspec and backend storage implementation docs for the set of allowed keys and values. .. versionadded:: 1.2.0 **kwargs : Additional keywords passed to `pyarrow.feather.write_feather`. .. versionadded:: 1.1.0 """ import_optional_dependency("pyarrow") from pyarrow import feather ioargs = get_filepath_or_buffer(path, mode="wb", storage_options=storage_options) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") valid_types = {"string", "unicode"} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " "for the index; you can .reset_index() to make the index into column(s)" ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError( "feather does not support serializing a non-default index for the index; " "you can .reset_index() to make the index into column(s)" ) if df.index.name is not None: raise ValueError( "feather does not serialize index meta-data on a default index" ) # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("feather must have string column names") feather.write_feather(df, ioargs.filepath_or_buffer, **kwargs) if ioargs.should_close: assert not isinstance(ioargs.filepath_or_buffer, str) ioargs.filepath_or_buffer.close()
def to_feather( df: DataFrame, path: FilePathOrBuffer[AnyStr], storage_options: StorageOptions = None, **kwargs, ): """ Write a DataFrame to the binary Feather format. Parameters ---------- df : DataFrame path : string file path, or file-like object {storage_options} .. versionadded:: 1.2.0 **kwargs : Additional keywords passed to `pyarrow.feather.write_feather`. .. versionadded:: 1.1.0 """ import_optional_dependency("pyarrow") from pyarrow import feather if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") valid_types = {"string", "unicode"} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " "for the index; you can .reset_index() to make the index into column(s)" ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError( "feather does not support serializing a non-default index for the index; " "you can .reset_index() to make the index into column(s)") if df.index.name is not None: raise ValueError( "feather does not serialize index meta-data on a default index") # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("feather must have string column names") with get_handle(path, "wb", storage_options=storage_options, is_text=False) as handles: feather.write_feather(df, handles.handle, **kwargs)