Exemplos de PyDataFrame.from_arrow_record_batches em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: polars.polars

Classe / Tipo: PyDataFrame

Método / Função: from_arrow_record_batches

Exemplos em hotexamples.com: 2

PyDataFrame.from_arrow_record_batches em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de polars.polars.PyDataFrame.from_arrow_record_batches em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

PyDataFrame(9)

from_arrow_record_batches(2)

read_dict(2)

read_dicts(2)

read_rows(2)

columns(1)

dtypes(1)

lazy(1)

set_column_names(1)

Métodos Frequentes

PyDataFrame (9)

from_arrow_record_batches (2)

read_dict (2)

read_dicts (2)

read_rows (2)

columns (1)

dtypes (1)

lazy (1)

set_column_names (1)

Exemplo n.º 1

0

Exibir arquivo

def arrow_to_pydf(data: pa.Table, columns: Optional[Sequence[str]] = None, rechunk: bool = True) -> "PyDataFrame": """ Construct a PyDataFrame from an Arrow Table. """ if columns is not None: try: data = data.rename_columns(columns) except pa.lib.ArrowInvalid as e: raise ValueError( "Dimensions of columns arg must match data dimensions.") from e data_dict = {} for i, column in enumerate(data): # extract the name before casting if column._name is None: name = f"column_{i}" else: name = column._name column = coerce_arrow(column) data_dict[name] = column batches = pa.table(data_dict).to_batches() pydf = PyDataFrame.from_arrow_record_batches(batches) if rechunk: pydf = pydf.rechunk() return pydf

Exemplo n.º 2

0

Exibir arquivo

def arrow_to_pydf(data: "pa.Table", columns: Optional[Sequence[str]] = None, rechunk: bool = True) -> "PyDataFrame": """ Construct a PyDataFrame from an Arrow Table. """ if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required when constructing a PyDataFrame from an Arrow Table." ) if columns is not None: try: data = data.rename_columns(columns) except pa.lib.ArrowInvalid as e: raise ValueError( "Dimensions of columns arg must match data dimensions.") from e data_dict = {} # dictionaries cannot be build in different batches (categorical does not allow that) # so we rechunk them and create them separate. dictionary_cols = {} names = [] for i, column in enumerate(data): # extract the name before casting if column._name is None: name = f"column_{i}" else: name = column._name names.append(name) column = coerce_arrow(column) if pa.types.is_dictionary(column.type): ps = arrow_to_pyseries(name, column, rechunk) dictionary_cols[i] = pli.wrap_s(ps) else: data_dict[name] = column if len(data_dict) > 0: tbl = pa.table(data_dict) # path for table without rows that keeps datatype if tbl.shape[0] == 0: pydf = pli.DataFrame._from_pandas(tbl.to_pandas())._df else: pydf = PyDataFrame.from_arrow_record_batches(tbl.to_batches()) else: pydf = pli.DataFrame([])._df if rechunk: pydf = pydf.rechunk() if len(dictionary_cols) > 0: df = pli.wrap_df(pydf) for i, s in dictionary_cols.items(): df[s.name] = s df = df[names] pydf = df._df return pydf