def read_table(source, columns=None, memory_map=True): """ Read a pyarrow.Table from Feather format Parameters ---------- source : str file path, or file-like object columns : sequence, optional Only read a specific set of columns. If not provided, all columns are read. memory_map : boolean, default True Use memory mapping when opening file on disk Returns ------- table : pyarrow.Table """ reader = ext.FeatherReader() reader.open(source, use_memory_map=memory_map) if columns is None: return reader.read() column_types = [type(column) for column in columns] if all(map(lambda t: t == int, column_types)): return reader.read_indices(columns) elif all(map(lambda t: t == str, column_types)): return reader.read_names(columns) column_type_names = [t.__name__ for t in column_types] raise TypeError("Columns must be indices or names. " "Got columns {} of types {}" .format(columns, column_type_names))
def read_table(source, columns=None): """ Read a pyarrow.Table from Feather format Parameters ---------- source : str file path, or file-like object columns : sequence, optional Only read a specific set of columns. If not provided, all columns are read. Returns ------- table : pyarrow.Table """ _check_pandas_version() reader = ext.FeatherReader() reader.open(source) if columns is None: return reader.read() column_types = [type(column) for column in columns] if all(map(lambda t: t == int, column_types)): return reader.read_indices(columns) elif all(map(lambda t: t == str, column_types)): return reader.read_names(columns) column_type_names = [t.__name__ for t in column_types] raise TypeError("Columns must be indices or names. " "Got columns {} of types {}" .format(columns, column_type_names))
def read_table(source, columns=None, memory_map=True): """ Read a pyarrow.Table from Feather format Parameters ---------- source : str file path, or file-like object columns : sequence, optional Only read a specific set of columns. If not provided, all columns are read. memory_map : boolean, default True Use memory mapping when opening file on disk Returns ------- table : pyarrow.Table """ reader = ext.FeatherReader() reader.open(source, use_memory_map=memory_map) if columns is None: return reader.read() column_types = [type(column) for column in columns] if all(map(lambda t: t == int, column_types)): table = reader.read_indices(columns) elif all(map(lambda t: t == str, column_types)): table = reader.read_names(columns) else: column_type_names = [t.__name__ for t in column_types] raise TypeError("Columns must be indices or names. " "Got columns {} of types {}" .format(columns, column_type_names)) # Feather v1 already respects the column selection if reader.version < 3: return table # Feather v2 reads with sorted / deduplicated selection elif sorted(set(columns)) == columns: return table else: # follow exact order / selection of names new_fields = [table.schema.field(c) for c in columns] new_schema = schema(new_fields, metadata=table.schema.metadata) new_columns = [table.column(c) for c in columns] return Table.from_arrays(new_columns, schema=new_schema)