Exemplo n.º 1
0
def verify_results_split(R, I, input_array_path, datadir):
    from dask_io.optimizer.cases.resplit_utils import get_blocks_shape
    splitfiles_partition = get_blocks_shape(R, I)
    print("split files partiton:", splitfiles_partition)

    all_true = True
    orig_arr = get_dask_array_from_hdf5(input_array_path,
                                        "/data",
                                        logic_cs=tuple(I))

    for i in range(splitfiles_partition[0]):
        for j in range(splitfiles_partition[1]):
            for k in range(splitfiles_partition[2]):
                splitfilename = f"{i}_{j}_{k}.hdf5"
                split_filepath = os.path.join(datadir, splitfilename)
                print("opening", split_filepath)
                splitarray = get_dask_array_from_hdf5(split_filepath, "/data")
                print(
                    f"Slices from ground truth {i*I[0]}:{(i+1)*I[0]}, {j*I[1]}:{(j+1)*I[1]}, {k*I[2]}:{(k+1)*I[2]}"
                )
                ground_truth_arr = orig_arr[i * I[0]:(i + 1) * I[0],
                                            j * I[1]:(j + 1) * I[1],
                                            k * I[2]:(k + 1) * I[2]]

                verify_task = da.allclose(ground_truth_arr, splitarray)
                print("VERIFY TASK: ", verify_task)
                disable_clustering()
                _res = verify_task.compute()
                print("RESULT: ", _res)
                if _res == False:
                    print(f"[Error] Split failed for {splitfilename}")
                    all_true = False

    clean_files()
    return all_true
Exemplo n.º 2
0
def verify_results_merge(input_array_path, merged_array_path):
    original_array = get_dask_array_from_hdf5(input_array_path, "/data")
    merged_array = get_dask_array_from_hdf5(merged_array_path, "/data")
    verify_task = da.allclose(original_array, merged_array)
    print("VERIFY TASK: ", verify_task)
    disable_clustering()
    _res = verify_task.compute()
    print("RESULT: ", _res)
    if _res == False:
        print("[Error] Rechunk failed")
    clean_files()
    return _res
Exemplo n.º 3
0
def test_sum(shape_to_test, nb_chunks):
    """ Test if the sum of two blocks yields the good result using our optimization function.
    """
    logger.info("testing shape %s", shape_to_test)

    # prepare test case
    case = Split(pytest.test_array_path, shape_to_test)
    case.sum(nb_chunks)

    # non optimized run
    disable_clustering()
    result_non_opti = case.get().compute()

    # optimized run
    enable_clustering(buffer_size)
    result_opti = case.get().compute()

    assert np.array_equal(result_non_opti, result_opti)
Exemplo n.º 4
0
    def store_correct():
        """ Compare the real chunks to the splits to see if correctly splitted. 
        """
        logger.info("Testing %s matches...", len(arr_list))
        with h5py.File(split_filepath, 'r') as f:
            for i, a in enumerate(arr_list):
                stored_a = da.from_array(f['/data' + str(i)])
                # logger.info("split shape: %s", stored_a.shape)

                stored_a.rechunk(chunks=shape_to_test)
                # logger.info("split rechunked to: %s", stored_a.shape)
                # logger.info("will be compared to : %s ", a.shape)
                # logger.info("Testing all close...")
                test = da.allclose(stored_a, a)
                disable_clustering(
                )  # TODO: remove this, make it work even for all close
                assert test.compute()
        logger.info("Passed.\n")
Exemplo n.º 5
0
def run_test(test, paths):
    """ Wrapper around 'run' function for diagnostics.

    Arguments:
    ----------
        test:
        paths:
    """
    test.print_config()
    uid = uuid.uuid4()
    print("Test ID is ", str(uid))

    params = getattr(test, 'params')
    splitcase = getattr(test, 'splitcase')
    mergecase = getattr(test, 'mergecase')

    if params["optimized"]:
        enable_clustering(params["buffer_size"])
    else:
        disable_clustering()

    flush_cache()
    try:
        arr = splitcase.get()
        tsplit, diagnostics_split, monitor_split = run_to_hdf5(
            arr, params, uid, str(params["chunk_shape"]), params["optimized"])
    except Exception as e:
        print(e)
        return [
            params["hardware"], params["cuboid_name"], params["array_shape"],
            params["chunk_type"], params["chunk_shape"], params["optimized"],
            params["buffer_size"], params["nthreads"], None, None, None, None,
            None, None, None, None
        ]
    finally:
        splitcase.clean()

    R = cuboids[params["cuboid_name"]]['shape']
    I = splitcase.chunks_shape
    print(f'R: {R}')
    print(f'I: {I}')
    if not 'auto' in I:
        success_run_split = verify_results_split(
            R, I, getattr(test, 'cuboid_filepath'),
            getattr(test, 'hardware_path'))
    else:
        success_run_split = None
    print(f'[Split] Find the diagnostics output file at {diagnostics_split}')
    print(f'[Split] Find the monitor output file at {monitor_split}')

    flush_cache()
    try:
        arr = mergecase.get()
        tmerge, diagnostics_merge, monitor_merge = run_to_hdf5(
            arr, params, uid, str(params["chunk_shape"]), params["optimized"])
    except Exception as e:
        print(e)
        return [
            params["hardware"], params["cuboid_name"], params["array_shape"],
            params["chunk_type"], params["chunk_shape"], params["optimized"],
            params["buffer_size"], params["nthreads"],
            round(tsplit, 4), None, None, None, None, None, None, None
        ]
    finally:
        mergecase.clean()

    success_run_merge = verify_results_merge(getattr(test, 'cuboid_filepath'),
                                             getattr(test, 'merge_filepath'))
    print(f'[Merge] Find the diagnostics output file at {diagnostics_merge}')
    print(f'[Merge] Find the monitor output file at {monitor_merge}')

    datadir = getattr(test, 'hardware_path')
    merged_filepath = getattr(test, 'merge_filepath')
    clean_directory(datadir)
    os.remove(merged_filepath)

    sample_res = [
        params["hardware"], params["cuboid_name"], params["array_shape"],
        params["chunk_type"], params["chunk_shape"], params["optimized"],
        params["buffer_size"], params["nthreads"],
        round(tsplit, 4),
        round(tmerge, 4), diagnostics_split, diagnostics_merge, monitor_split,
        monitor_merge, success_run_split, success_run_merge
    ]
    print("-------------RESULT\n", sample_res)
    return sample_res
Exemplo n.º 6
0
                    with dask.config.set(scheduler='single-threaded'):
                        t = time.time()
                        _ = split_arr.compute()
                        t = time.time() - t
                        times.append([buffer, t, "optimized"])
                        visualize([prof, rprof, cprof],
                                  os.path.join(output_directory,
                                               str(buffer) + "opti" + ".html"),
                                  show=False)

            os.remove(output_filepath)  # remove output file for next run
            with h5py.File(output_filepath, 'x') as f_out:  # open split array
                # run non optimized
                split_arr = split_to_hdf5(in_arr, f_out, nb_blocks=None)
                print("RUNNING NON OPTIMIZED")
                disable_clustering()
                flush_cache()
                with Profiler() as prof, ResourceProfiler(
                ) as rprof, CacheProfiler(metric=nbytes) as cprof:
                    t = time.time()
                    _ = split_arr.compute()
                    t = time.time() - t
                    times.append([buffer, t, "non optimized"])
                    visualize([prof, rprof, cprof],
                              os.path.join(output_directory,
                                           str(buffer) + "nonopti" + ".html"),
                              show=False)

            os.remove(output_filepath)  # remove output file for next run

    for r in times:
Exemplo n.º 7
0
def test_split(optimized, nb_chunks, shape_to_test):
    def create_arrays_for_comparison():
        """ Get chunks as dask arrays to compare the chunks to the splitted files.
        """
        arr = get_dask_array_from_hdf5(pytest.test_array_path,
                                       '/data',
                                       logic_cs=shape_to_test)
        arr_list = get_arr_chunks(arr, nb_chunks=nb_chunks)
        return arr_list

    def apply_sanity_check(split_filepath):
        """ Check if splitted file not empty.
        """
        logger.info("Checking split file integrity...")
        with h5py.File(split_filepath, 'r') as f:
            keys_list = list(f.keys())
            logger.info("file : %s", f)
            logger.info("Number of datasets in hdf5 file : %s", len(keys_list))
            logger.info("First item: %s", keys_list[0])
            assert len(list(f.keys())) != 0
        logger.info("Integrity check passed.\n")

    def store_correct():
        """ Compare the real chunks to the splits to see if correctly splitted. 
        """
        logger.info("Testing %s matches...", len(arr_list))
        with h5py.File(split_filepath, 'r') as f:
            for i, a in enumerate(arr_list):
                stored_a = da.from_array(f['/data' + str(i)])
                # logger.info("split shape: %s", stored_a.shape)

                stored_a.rechunk(chunks=shape_to_test)
                # logger.info("split rechunked to: %s", stored_a.shape)
                # logger.info("will be compared to : %s ", a.shape)
                # logger.info("Testing all close...")
                test = da.allclose(stored_a, a)
                disable_clustering(
                )  # TODO: remove this, make it work even for all close
                assert test.compute()
        logger.info("Passed.\n")

    def split():
        # overwrite if split file already exists
        if os.path.isfile(split_filepath):
            os.remove(split_filepath)

        case = Split(pytest.test_array_path, shape_to_test)
        case.split_hdf5(split_filepath, nb_blocks=nb_chunks)
        case.get().compute()
        return

    logger.info("PARAMETERS:")
    logger.info("Optimized: %s", optimized), nb_chunks, shape_to_test
    logger.info("Nb_chunk: %s", nb_chunks)
    logger.info("Shape: %s \n", shape_to_test)

    # setup config
    split_filepath = "./split_file.hdf5"

    if optimized:
        enable_clustering(buffer_size)
    else:
        disable_clustering()

    # test
    split()
    apply_sanity_check(split_filepath)

    # assert
    arr_list = create_arrays_for_comparison()
    store_correct()
Exemplo n.º 8
0
def test_array_path():
    disable_clustering()
    array_filepath = './small_array_nochunk.hdf5'
    if not os.path.isfile(array_filepath):
        create_test_array_nochunk(array_filepath, (100, 100, 100))
    return array_filepath