Exemplo n.º 1
0
def delete_meta_fs(filename: AnyStr):
    """
    Delete meta data from disk.
    """
    file_dir = settings.pj(settings.fs_meta, filename)
    settings.Path(file_dir).mkdir(parents=True, exist_ok=True)
    filepath = settings.pj(file_dir, "meta.pkl")
    return np.array(os.remove(filepath), dtype=object)
Exemplo n.º 2
0
def write_meta_fs(meta: Dict, filename: AnyStr):
    """
    Write meta data to disk.
    """
    file_dir = settings.pj(settings.fs_meta, filename)
    settings.Path(file_dir).mkdir(parents=True, exist_ok=True)
    filepath = settings.pj(file_dir, "meta.pkl")
    with open(filepath, "wb") as fh:
        return np.array(pickle.dump(meta, fh), dtype=object)
Exemplo n.º 3
0
def delete_block_fs(filename, grid_entry: Tuple):
    """
    Delete block from disk.
    """
    file_dir = settings.pj(settings.fs_data, filename)
    settings.Path(file_dir).mkdir(parents=True, exist_ok=True)
    entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE
    filepath = settings.pj(file_dir, entry_name)
    return np.array(os.remove(filepath), dtype=object)
Exemplo n.º 4
0
def read_block_fs(filename, grid_entry: Tuple):
    """
    Read block from disk.
    """
    file_dir = settings.pj(settings.fs_data, filename)
    settings.Path(file_dir).mkdir(parents=True, exist_ok=True)
    entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE
    filepath = settings.pj(file_dir, entry_name)
    return load(filepath)
Exemplo n.º 5
0
def write_block_fs(block: Any, filename: AnyStr, grid_entry: Tuple):
    """
    Write block to disk.
    """
    file_dir = settings.pj(settings.fs_data, filename)
    settings.Path(file_dir).mkdir(parents=True, exist_ok=True)
    entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE
    filepath = settings.pj(file_dir, entry_name)
    return np.array(save(block, filepath), dtype=object)
Exemplo n.º 6
0
def read_meta_fs(filename: AnyStr):
    """
    Read meta data from disk.
    """
    file_dir = settings.pj(settings.fs_meta, filename)
    settings.Path(file_dir).mkdir(parents=True, exist_ok=True)
    filepath = settings.pj(file_dir, "meta.pkl")
    with open(filepath, "rb") as fh:
        return pickle.load(fh)
Exemplo n.º 7
0
def read_block_fs(filename, grid_entry: Tuple):
    """
    Read block from disk.
    """
    entry_name = "_".join(list(map(str, grid_entry))) + "." + ARRAY_FILETYPE
    filepath = settings.pj(filename, entry_name)
    return load(filepath)
Exemplo n.º 8
0
def test_read_csv():
    import nums
    from nums.core import settings
    settings.system_name = "serial"

    filename = settings.pj(settings.project_root, "tests", "core", "storage", "test.csv")
    ba = nums.read_csv(filename, has_header=True)
    assert np.allclose(ba[0].get(), [123, 4, 5])
    assert np.allclose(ba[-1].get(), [1.2, 3.4, 5.6])
Exemplo n.º 9
0
def read_meta_fs(filename: AnyStr):
    """
    Read meta data from disk.
    """
    filepath = settings.pj(filename, "meta.pkl")
    try:
        with open(filepath, "rb") as fh:
            return pickle.load(fh)
    except FileNotFoundError as _:
        return None
Exemplo n.º 10
0
def delete_file_fs(filename: AnyStr):
    """
    Delete dir corresponding to file from disk.
    """
    filepath = settings.pj(filename, "meta.pkl")
    if not pathlib.Path(filepath).is_file():
        return False
    # If the meta data file exists, the dir is a NumS file.
    # Delete it.
    try:
        shutil.rmtree(filename)
        return True
    except Exception as _:
        return False
Exemplo n.º 11
0
def test_modin(nps_app_inst):
    import nums
    import nums.numpy as nps
    import modin.pandas as mpd
    from nums.core import settings
    from nums.core.systems.systems import RaySystem

    if not isinstance(nps_app_inst.cm.system, RaySystem):
        return

    filename = settings.pj(settings.project_root, "tests", "core", "storage",
                           "test.csv")
    ba1 = nums.read_csv(filename, has_header=True)
    df = mpd.read_csv(filename)
    ba2: BlockArray = nums.from_modin(df)
    assert nps.allclose(ba1, ba2)
Exemplo n.º 12
0
def get_parts_fs(filename: AnyStr, grid_meta: Dict):
    base: pathlib.Path = pathlib.Path(filename)
    if not base.is_dir():
        return None
    results = []
    grid: ArrayGrid = ArrayGrid.from_meta(grid_meta)
    # This is a multi-dimensional array of blocks, so entries should be relatively small.
    assert np.all(np.array(grid.block_shape) < 2**32)
    contains_all = True
    for grid_entry in grid.get_entry_iterator():
        entry_name = "_".join(list(map(str,
                                       grid_entry))) + "." + ARRAY_FILETYPE
        entry_filename = settings.pj(filename, entry_name)
        if pathlib.Path(entry_filename).is_file():
            results.append(grid_entry)
        else:
            contains_all = False
    if contains_all:
        return "all"
    else:
        if len(results) == 0:
            return None
        else:
            return np.array(results, dtype=np.uint32)
Exemplo n.º 13
0
    pd_parts = frame._frame_mgr_cls.map_partitions(frame._partitions, lambda df: np.array(df))
    grid_shape = len(frame._row_lengths), len(frame._column_widths)

    shape = (np.sum(frame._row_lengths), np.sum(frame._column_widths))
    block_shape = app.get_block_shape(shape, dtype)
    rows = []
    for i in range(grid_shape[0]):
        cols = []
        for j in range(grid_shape[1]):
            curr_block_shape = (frame._row_lengths[i], frame._column_widths[j])
            part: PandasOnRayFramePartition = pd_parts[(i, j)]
            part.drain_call_queue()
            ba: BlockArray = BlockArray.from_oid(part.oid, curr_block_shape, dtype, system)
            cols.append(ba)
        if grid_shape[1] == 1:
            row_ba: BlockArray = cols[0]
        else:
            row_ba: BlockArray = app.concatenate(cols, axis=1, axis_block_size=block_shape[1])
        rows.append(row_ba)
    result = app.concatenate(rows, axis=0, axis_block_size=block_shape[0])
    return result


if __name__ == "__main__":
    from nums.core import settings
    import modin.pandas as mpd
    filename = settings.pj(settings.project_root, "tests", "core", "storage", "test.csv")
    df = mpd.read_csv(filename)
    ba: BlockArray = from_modin(df)
    print(ba.get())