def read_ipc( file: Union[str, BinaryIO, BytesIO, Path, bytes], columns: Optional[Union[List[int], List[str]]] = None, n_rows: Optional[int] = None, use_pyarrow: bool = False, memory_map: bool = True, storage_options: Optional[Dict] = None, row_count_name: Optional[str] = None, row_count_offset: int = 0, rechunk: bool = True, **kwargs: Any, ) -> DataFrame: """ Read into a DataFrame from Arrow IPC (Feather v2) file. Parameters ---------- file Path to a file or a file-like object. If ``fsspec`` is installed, it will be used to open remote files. columns Columns to select. Accepts a list of column indices (starting at zero) or a list of column names. n_rows Stop reading from IPC file after reading ``n_rows``. Only valid when `use_pyarrow=False`. use_pyarrow Use pyarrow or the native rust reader. memory_map Memory map underlying file. This will likely increase performance. Only used when ``use_pyarrow=True``. storage_options Extra options that make sense for ``fsspec.open()`` or a particular storage connection, e.g. host, port, username, password, etc. row_count_name If not None, this will insert a row count column with give name into the DataFrame row_count_offset Offset to start the row_count column (only use if the name is set) rechunk Make sure that all data is contiguous. Returns ------- DataFrame """ # Map legacy arguments to current ones and remove them from kwargs. n_rows = kwargs.pop("stop_after_n_rows", n_rows) if columns is None: columns = kwargs.pop("projection", None) if use_pyarrow: if row_count_name is not None: raise ValueError( "``row_count_name`` cannot be used with ``use_pyarrow=True``.") if n_rows: raise ValueError( "``n_rows`` cannot be used with ``use_pyarrow=True``.") storage_options = storage_options or {} with _prepare_file_arg(file, **storage_options) as data: if use_pyarrow: if not _PYARROW_AVAILABLE: raise ImportError( "'pyarrow' is required when using 'read_ipc(..., use_pyarrow=True)'." ) tbl = pa.feather.read_table(data, memory_map=memory_map, columns=columns) return DataFrame._from_arrow(tbl, rechunk=rechunk) return DataFrame._read_ipc( data, columns=columns, n_rows=n_rows, row_count_name=row_count_name, row_count_offset=row_count_offset, rechunk=rechunk, )
def from_arrow(a: Union["pa.Table", "pa.Array", "pa.ChunkedArray"], rechunk: bool = True) -> Union[DataFrame, Series]: """ Create a DataFrame or Series from an Arrow Table or Array. This operation will be zero copy for the most part. Types that are not supported by Polars may be cast to the closest supported type. Parameters ---------- a : Arrow Table or Array Data represented as Arrow Table or Array. rechunk : bool, default True Make sure that all data is contiguous. Returns ------- DataFrame or Series Examples -------- Constructing a DataFrame from an Arrow Table: >>> import pyarrow as pa >>> data = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}) >>> df = pl.from_arrow(data) >>> df shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═════╡ │ 1 ┆ 4 │ ├╌╌╌╌╌┼╌╌╌╌╌┤ │ 2 ┆ 5 │ ├╌╌╌╌╌┼╌╌╌╌╌┤ │ 3 ┆ 6 │ └─────┴─────┘ Constructing a Series from an Arrow Array: >>> import pyarrow as pa >>> data = pa.array([1, 2, 3]) >>> series = pl.from_arrow(data) >>> series shape: (3,) Series: '' [i64] [ 1 2 3 ] """ if not _PYARROW_AVAILABLE: raise ImportError("'pyarrow' is required when using from_arrow()." ) # pragma: no cover if isinstance(a, pa.Table): return DataFrame._from_arrow(a, rechunk=rechunk) elif isinstance(a, (pa.Array, pa.ChunkedArray)): return Series._from_arrow("", a, rechunk) else: raise ValueError(f"Expected Arrow Table or Array, got {type(a)}.")