Пример #1
0
    def process_pandas(self, source: pd.DataFrame):
        self.protocol = "pandas"
        col_names = list(source.columns.values)
        col_types = [
            utils._from_numpy_dtype(dtype) for dtype in source.dtypes.values
        ]

        table = pa.Table.from_pandas(source, preserve_index=False)
        sink = pa.BufferOutputStream()
        with pa.ipc.new_stream(sink, table.schema) as writer:
            writer.write_table(table)
        buf = sink.getvalue()

        self.deduced_properties = list(zip(col_names, col_types))
        self.source = bytes(memoryview(buf))
Пример #2
0
    def process_pandas(self, source: pd.DataFrame):
        self.protocol = "pandas"
        check_argument(len(source.shape) == 2)
        self.row_num = source.shape[0]
        self.column_num = source.shape[1]

        # Only support a subset of data types.
        check_argument(source.dtypes.values[0] in (np.dtype("int64"), np.dtype("long")))
        for dtype in source.dtypes.values:
            check_argument(
                dtype in (np.dtype("int64"), np.dtype("long"), np.dtype("float64"))
            )

        col_names = list(source.columns.values)
        col_types = [utils._from_numpy_dtype(dtype) for dtype in source.dtypes.values]

        self.deduced_properties = list(zip(col_names, col_types))
        self.property_bytes = [source[name].values.tobytes("F") for name in col_names]
Пример #3
0
    def process_numpy(self, source: Sequence[np.ndarray]):
        self.protocol = "numpy"
        self.row_num = source[0].shape[0]
        self.column_num = len(source)

        # Only support a subset of data types.
        check_argument(source[0].dtype in (np.dtype("int64"),
                                           np.dtype("long")))
        for col in source:
            check_argument(col.dtype in (
                np.dtype("int64"),
                np.dtype("long"),
                np.dtype("float64"),
            ))

        col_names = ["f%s" % i for i in range(self.column_num)]
        col_types = [utils._from_numpy_dtype(col.dtype) for col in source]

        self.deduced_properties = list(zip(col_names, col_types))
        self.property_bytes = [col.tobytes("F") for col in source]