def delete_meta_fs(filename: AnyStr): """ Delete meta data from disk. """ file_dir = settings.pj(settings.fs_meta, filename) settings.Path(file_dir).mkdir(parents=True, exist_ok=True) filepath = settings.pj(file_dir, "meta.pkl") return np.array(os.remove(filepath), dtype=object)
def write_meta_fs(meta: Dict, filename: AnyStr): """ Write meta data to disk. """ file_dir = settings.pj(settings.fs_meta, filename) settings.Path(file_dir).mkdir(parents=True, exist_ok=True) filepath = settings.pj(file_dir, "meta.pkl") with open(filepath, "wb") as fh: return np.array(pickle.dump(meta, fh), dtype=object)
def delete_block_fs(filename, grid_entry: Tuple): """ Delete block from disk. """ file_dir = settings.pj(settings.fs_data, filename) settings.Path(file_dir).mkdir(parents=True, exist_ok=True) entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE filepath = settings.pj(file_dir, entry_name) return np.array(os.remove(filepath), dtype=object)
def read_block_fs(filename, grid_entry: Tuple): """ Read block from disk. """ file_dir = settings.pj(settings.fs_data, filename) settings.Path(file_dir).mkdir(parents=True, exist_ok=True) entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE filepath = settings.pj(file_dir, entry_name) return load(filepath)
def write_block_fs(block: Any, filename: AnyStr, grid_entry: Tuple): """ Write block to disk. """ file_dir = settings.pj(settings.fs_data, filename) settings.Path(file_dir).mkdir(parents=True, exist_ok=True) entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE filepath = settings.pj(file_dir, entry_name) return np.array(save(block, filepath), dtype=object)
def read_meta_fs(filename: AnyStr): """ Read meta data from disk. """ file_dir = settings.pj(settings.fs_meta, filename) settings.Path(file_dir).mkdir(parents=True, exist_ok=True) filepath = settings.pj(file_dir, "meta.pkl") with open(filepath, "rb") as fh: return pickle.load(fh)
def read_block_fs(filename, grid_entry: Tuple): """ Read block from disk. """ entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE filepath = settings.pj(filename, entry_name) return load(filepath)
def test_read_csv(): import nums from nums.core import settings settings.system_name = "serial" filename = settings.pj(settings.project_root, "tests", "core", "storage", "test.csv") ba = nums.read_csv(filename, has_header=True) assert np.allclose(ba[0].get(), [123, 4, 5]) assert np.allclose(ba[-1].get(), [1.2, 3.4, 5.6])
def read_meta_fs(filename: AnyStr): """ Read meta data from disk. """ filepath = settings.pj(filename, "meta.pkl") try: with open(filepath, "rb") as fh: return pickle.load(fh) except FileNotFoundError as _: return None
def delete_file_fs(filename: AnyStr): """ Delete dir corresponding to file from disk. """ filepath = settings.pj(filename, "meta.pkl") if not pathlib.Path(filepath).is_file(): return False # If the meta data file exists, the dir is a NumS file. # Delete it. try: shutil.rmtree(filename) return True except Exception as _: return False
def test_modin(nps_app_inst): import nums import nums.numpy as nps import modin.pandas as mpd from nums.core import settings from nums.core.systems.systems import RaySystem if not isinstance(nps_app_inst.cm.system, RaySystem): return filename = settings.pj(settings.project_root, "tests", "core", "storage", "test.csv") ba1 = nums.read_csv(filename, has_header=True) df = mpd.read_csv(filename) ba2: BlockArray = nums.from_modin(df) assert nps.allclose(ba1, ba2)
def get_parts_fs(filename: AnyStr, grid_meta: Dict): base: pathlib.Path = pathlib.Path(filename) if not base.is_dir(): return None results = [] grid: ArrayGrid = ArrayGrid.from_meta(grid_meta) # This is a multi-dimensional array of blocks, so entries should be relatively small. assert np.all(np.array(grid.block_shape) < 2**32) contains_all = True for grid_entry in grid.get_entry_iterator(): entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE entry_filename = settings.pj(filename, entry_name) if pathlib.Path(entry_filename).is_file(): results.append(grid_entry) else: contains_all = False if contains_all: return "all" else: if len(results) == 0: return None else: return np.array(results, dtype=np.uint32)
pd_parts = frame._frame_mgr_cls.map_partitions(frame._partitions, lambda df: np.array(df)) grid_shape = len(frame._row_lengths), len(frame._column_widths) shape = (np.sum(frame._row_lengths), np.sum(frame._column_widths)) block_shape = app.get_block_shape(shape, dtype) rows = [] for i in range(grid_shape[0]): cols = [] for j in range(grid_shape[1]): curr_block_shape = (frame._row_lengths[i], frame._column_widths[j]) part: PandasOnRayFramePartition = pd_parts[(i, j)] part.drain_call_queue() ba: BlockArray = BlockArray.from_oid(part.oid, curr_block_shape, dtype, system) cols.append(ba) if grid_shape[1] == 1: row_ba: BlockArray = cols[0] else: row_ba: BlockArray = app.concatenate(cols, axis=1, axis_block_size=block_shape[1]) rows.append(row_ba) result = app.concatenate(rows, axis=0, axis_block_size=block_shape[0]) return result if __name__ == "__main__": from nums.core import settings import modin.pandas as mpd filename = settings.pj(settings.project_root, "tests", "core", "storage", "test.csv") df = mpd.read_csv(filename) ba: BlockArray = from_modin(df) print(ba.get())