def matrix_block_to_numpy(jvm: JVMView, mb: JavaObject): numRows = mb.getNumRows() numCols = mb.getNumColumns() buf = jvm.org.tugraz.sysds.runtime.compress.utils.Py4jConverterUtils.convertMBtoPy4JDenseArr( mb) return np.frombuffer(buf, count=numRows * numCols, dtype=np.float64).reshape((numRows, numCols))
def frame_block_to_pandas(sds: "SystemDSContext", fb: JavaObject): num_rows = fb.getNumRows() num_cols = fb.getNumColumns() df = pd.DataFrame() for c_index in range(num_cols): col_data = fb.getColumnData(c_index) df[fb.getColumnName(c_index)] = np.array(col_data[:num_rows]) return df
def matrix_block_to_numpy(jvm: JVMView, mb: JavaObject): """Converts a MatrixBlock object in the JVM to a numpy array. :param jvm: The current JVM instance running systemds. :param mb: A pointer to the JVM's MatrixBlock object. """ num_ros = mb.getNumRows() num_cols = mb.getNumColumns() buf = jvm.org.apache.sysds.runtime.util.Py4jConverterUtils.convertMBtoPy4JDenseArr( mb) return np.frombuffer(buf, count=num_ros * num_cols, dtype=np.float64).reshape((num_ros, num_cols))
def frame_block_to_pandas(sds: "SystemDSContext", fb: JavaObject): num_rows = fb.getNumRows() num_cols = fb.getNumColumns() data = [] df = pd.DataFrame() for c_index in range(num_cols): d_type = fb.getColumnType(c_index) if d_type == "String": ret = [] for row in range(num_rows): ent = fb.getIndexAsBytes(c_index, row) if ent: ent = ent.decode() ret.append(ent) else: ret.append(None) elif d_type == "Int": byteArray = fb.getColumnAsBytes(c_index) ret = np.frombuffer(byteArray, dtype=np.int32) elif d_type == "Long": byteArray = fb.getColumnAsBytes(c_index) ret = np.frombuffer(byteArray, dtype=np.int64) elif d_type == "Double": byteArray = fb.getColumnAsBytes(c_index) ret = np.frombuffer(byteArray, dtype=np.float64) elif d_type == "Boolean": # TODO maybe it is more efficient to bit pack the booleans. # https://stackoverflow.com/questions/5602155/numpy-boolean-array-with-1-bit-entries byteArray = fb.getColumnAsBytes(c_index) ret = np.frombuffer(byteArray, dtype=np.dtype("?")) else: raise NotImplementedError( f'Not Implemented {d_type} for systemds to pandas parsing') df[fb.getColumnName(c_index)] = ret return df