def convert_variable(variable_name, variable):
    source_zarr_path = source_path.joinpath(variable_name)
    if not source_zarr_path.exists():
        raise (OSError("source zarr archive" + str(source_zarr_path) +
                       "not found"))

    z_source = zarr.open(str(source_zarr_path))

    target_zarr_path = target_path.joinpath(variable_name)
    print("starting", target_zarr_path, flush=True)

    nr_daily_times = (z_source.shape[-1] - 1) * days_per_month + 1
    target_shape = z_source.shape[:-1] + (nr_daily_times, )
    target_chunks = (1, 1, 1, -1)
    z_target = load_zarr_archive(target_zarr_path,
                                 target_shape,
                                 target_chunks,
                                 overwrite=True)

    lat_subs = np.array_split(np.arange(z_source.shape[0]), 5)
    lon_subs = np.array_split(np.arange(z_source.shape[1]), 5)
    prob_subs = np.array_split(np.arange(z_source.shape[2]), 5)

    coord_tuples = [(lat, lon, prob) for lat in lat_subs for lon in lon_subs
                    for prob in prob_subs]
    for coord_tuple in tqdm(coord_tuples):
        lat, lon, prob = coord_tuple
        s0 = slice(lat[0], lat[-1] + 1, 1)
        s1 = slice(lon[0], lon[-1] + 1, 1)
        s2 = slice(prob[0], prob[-1] + 1, 1)

        z_source_sliced = z_source[s0, s1, s2]
        z_target_sliced = z_target[s0, s1, s2]
        z_target[s0, s1, s2] = compute_target(variable_name, z_target_sliced,
                                              z_source_sliced)

        del z_source_sliced
        del z_target_sliced
    print("done", target_zarr_path, flush=True)
    return 1
Exemplo n.º 2
0
# ## Computing
#
# *Attention:* `"overwrite" = True` in the task disctionary deletes all data in the selected slices. The setting `"overwrite" = False` tries to load an existing archive and extend it by computing incomplete points within the chosen slices.

# +
# %%time

for task in task_list:
    print("task: computing", task["computation"])
    print()

    zarr_path = Path(project_path.joinpath(task["computation"]))
    print("zarr archive:", str(zarr_path))
    z = load_zarr_archive(zarr_path,
                          task["result_shape"],
                          task["result_chunks"],
                          overwrite=task["overwrite"])

    nr_incomplete_sites, _, _ = CARDAMOMlib.get_incomplete_sites(z, slices)
    print("Number of incomplete sites:", nr_incomplete_sites)
    logfile_name = str(project_path.joinpath(task["computation"] + ".log"))
    print("Logfile:", logfile_name)

    for timeout in task["timeouts"]:
        CARDAMOMlib.compute_incomplete_sites(timeout, z, nr_times,
                                             variable_names, variables,
                                             non_data_variables, slices, task,
                                             logfile_name)

    nr_incomplete_sites, _, _ = CARDAMOMlib.get_incomplete_sites(z, slices)
    write_to_logfile(logfile_name, nr_incomplete_sites,
Exemplo n.º 3
0
def run_task_with_mr(
    project_path,
    task,
    nr_pools,
    time_step_in_days,
    times_da,
    start_values_zarr,
    us_zarr,
    Bs_zarr,
    slices
):
    print("task: computing", task["computation"])
    print()
            
    zarr_path = Path(project_path.joinpath(task["computation"]))
    print("zarr archive:", str(zarr_path))
    z = load_zarr_archive(
        zarr_path,
        task["result_shape"],
        task["result_chunks"],
        overwrite=task["overwrite"]
    )

#    nr_incomplete_sites, _ = get_incomplete_site_tuples_for_mr_computation(
#        start_values_zarr,
#        us_zarr,
#        Bs_zarr,
#        z,
#        slices
#    )
#    print("Number of incomplete sites:", nr_incomplete_sites)

    logfile_name = str(project_path.joinpath(task["computation"] + ".log"))
    print("Logfile:", logfile_name)

    for timeout in task["timeouts"]:
        done = False
        done = compute_incomplete_sites_with_mr(
            timeout,
            z,
            nr_pools,
            time_step_in_days,
            times_da,
            start_values_zarr,
            us_zarr,
            Bs_zarr,
            slices,
            task,
            logfile_name
        )
        if done:
            break

    if done:
        nr_incomplete_sites = 0
    else:
        nr_incomplete_sites, _, _ = get_incomplete_site_tuples_for_mr_computation(
            start_values_zarr,
            us_zarr,
            Bs_zarr,
            z,
            slices
        )

    write_to_logfile(logfile_name, nr_incomplete_sites, "incomplete sites remaining")
    print(nr_incomplete_sites, "incomplete sites remaining")
    print()