Пример #1
0
def read_table(source, columns=None, memory_map=True):
    """
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default True
        Use memory mapping when opening file on disk

    Returns
    -------
    table : pyarrow.Table
    """
    reader = ext.FeatherReader()
    reader.open(source, use_memory_map=memory_map)

    if columns is None:
        return reader.read()

    column_types = [type(column) for column in columns]
    if all(map(lambda t: t == int, column_types)):
        return reader.read_indices(columns)
    elif all(map(lambda t: t == str, column_types)):
        return reader.read_names(columns)

    column_type_names = [t.__name__ for t in column_types]
    raise TypeError("Columns must be indices or names. "
                    "Got columns {} of types {}"
                    .format(columns, column_type_names))
Пример #2
0
def read_table(source, columns=None):
    """
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.

    Returns
    -------
    table : pyarrow.Table
    """
    _check_pandas_version()
    reader = ext.FeatherReader()
    reader.open(source)

    if columns is None:
        return reader.read()

    column_types = [type(column) for column in columns]
    if all(map(lambda t: t == int, column_types)):
        return reader.read_indices(columns)
    elif all(map(lambda t: t == str, column_types)):
        return reader.read_names(columns)

    column_type_names = [t.__name__ for t in column_types]
    raise TypeError("Columns must be indices or names. "
                    "Got columns {} of types {}"
                    .format(columns, column_type_names))
Пример #3
0
def read_table(source, columns=None, memory_map=True):
    """
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default True
        Use memory mapping when opening file on disk

    Returns
    -------
    table : pyarrow.Table
    """
    reader = ext.FeatherReader()
    reader.open(source, use_memory_map=memory_map)

    if columns is None:
        return reader.read()

    column_types = [type(column) for column in columns]
    if all(map(lambda t: t == int, column_types)):
        table = reader.read_indices(columns)
    elif all(map(lambda t: t == str, column_types)):
        table = reader.read_names(columns)
    else:
        column_type_names = [t.__name__ for t in column_types]
        raise TypeError("Columns must be indices or names. "
                        "Got columns {} of types {}"
                        .format(columns, column_type_names))

    # Feather v1 already respects the column selection
    if reader.version < 3:
        return table
    # Feather v2 reads with sorted / deduplicated selection
    elif sorted(set(columns)) == columns:
        return table
    else:
        # follow exact order / selection of names
        new_fields = [table.schema.field(c) for c in columns]
        new_schema = schema(new_fields, metadata=table.schema.metadata)
        new_columns = [table.column(c) for c in columns]
        return Table.from_arrays(new_columns, schema=new_schema)