示例#1
0
#variable_paths = [p for p in zarr_path.iterdir() if p.is_dir()]
#
#variable_names = []
#variables = []
#for variable_path in variable_paths:
#    variable_names.append(variable_path.name)
#    variables.append(da.from_zarr(str(variable_path)))

# +
#ds = xr.merge(dss)
#ds = xr.open_zarr(data_folder + filestem + "zarr_version/26_78.00_95.00")
#
#ds

# +
ms = CARDAMOMlib.load_model_structure_greg()


def make_fake_ds(dataset):
    # fake start_values data
    fake_data_sv = np.zeros(
        (len(dataset.lat), len(dataset.lon), len(dataset.prob), ms.nr_pools))

    coords_pool = [d['pool_name'] for d in ms.pool_structure]
    fake_coords_sv = {
        'lat': dataset.lat.data,
        'lon': dataset.lon.data,
        'prob': dataset.prob.data,
        'pool': coords_pool
    }
示例#2
0
def compute_pwc_mr_fd_for_one_prob(prob_nr):
    data_folder = "/home/data/CARDAMOM/"  # matagorda, antakya
    filestem = "Greg_2020_10_26/"
    output_folder = "output/"
    #pwc_mr_fd_archive = data_folder + output_folder + 'pwc_mr_fd/'

    logfilename = data_folder + filestem + output_folder + "pwc_mr_fd_%04d.log" % prob_nr

    #    ds = xr.open_mfdataset(data_folder + filestem + "SUM*.nc")
    ds = xr.open_dataset(data_folder + filestem + "small_netcdf/" +
                         "rechunked.nc")
    #ds

    # In[4]:

    ms = CARDAMOMlib.load_model_structure_greg()

    def make_fake_ds(dataset):
        # fake start_values data
        fake_data_sv = np.zeros((len(dataset.lat), len(dataset.lon),
                                 len(dataset.prob), ms.nr_pools))

        coords_pool = [d['pool_name'] for d in ms.pool_structure]
        fake_coords_sv = {
            'lat': dataset.lat.data,
            'lon': dataset.lon.data,
            'prob': dataset.prob.data,
            'pool': coords_pool
        }

        fake_array_sv = xr.DataArray(data=fake_data_sv,
                                     dims=['lat', 'lon', 'prob', 'pool'],
                                     coords=fake_coords_sv)

        # fake times data
        fake_data_times = np.zeros((len(dataset.lat), len(dataset.lon),
                                    len(dataset.prob), len(dataset.time)))

        fake_coords_times = {
            'lat': dataset.lat.data,
            'lon': dataset.lon.data,
            'prob': dataset.prob.data,
            'time': dataset.time.data
        }

        fake_array_times = xr.DataArray(data=fake_data_times,
                                        dims=['lat', 'lon', 'prob', 'time'],
                                        coords=fake_coords_times)

        # fake us data
        fake_data_us = np.zeros(
            (len(dataset.lat), len(dataset.lon), len(dataset.prob),
             len(dataset.time), ms.nr_pools))

        fake_coords_us = {
            'lat': dataset.lat.data,
            'lon': dataset.lon.data,
            'prob': dataset.prob.data,
            'time': dataset.time.data,
            'pool': coords_pool
        }

        fake_array_us = xr.DataArray(
            data=fake_data_us,
            dims=['lat', 'lon', 'prob', 'time', 'pool'],
            coords=fake_coords_us)

        # fake Bs data
        fake_data_Bs = np.zeros(
            (len(dataset.lat), len(dataset.lon), len(dataset.prob),
             len(dataset.time), ms.nr_pools, ms.nr_pools))

        fake_coords_Bs = {
            'lat': dataset.lat.data,
            'lon': dataset.lon.data,
            'prob': dataset.prob.data,
            'time': dataset.time.data,
            'pool_to': coords_pool,
            'pool_from': coords_pool
        }

        fake_array_Bs = xr.DataArray(
            data=fake_data_Bs,
            dims=['lat', 'lon', 'prob', 'time', 'pool_to', 'pool_from'],
            coords=fake_coords_Bs)

        # fake log data
        shape = (
            len(dataset.lat),
            len(dataset.lon),
            len(dataset.prob),
        )
        fake_data_log = np.ndarray(shape, dtype="<U150")

        fake_coords_log = {
            'lat': dataset.lat.data,
            'lon': dataset.lon.data,
            'prob': dataset.prob.data
        }

        fake_array_log = xr.DataArray(data=fake_data_log,
                                      dims=['lat', 'lon', 'prob'],
                                      coords=fake_coords_log)

        # collect fake arrays in ds
        fake_data_vars = dict()
        fake_data_vars['start_values'] = fake_array_sv
        fake_data_vars['times'] = fake_array_times
        fake_data_vars['us'] = fake_array_us
        fake_data_vars['Bs'] = fake_array_Bs
        fake_data_vars['log'] = fake_array_log

        fake_coords = {
            'lat': dataset.lat.data,
            'lon': dataset.lon.data,
            'prob': dataset.prob.data,
            'time': dataset.time.data,
            'pool': coords_pool,
            'pool_to': coords_pool,
            'pool_from': coords_pool
        }

        fake_ds = xr.Dataset(data_vars=fake_data_vars, coords=fake_coords)

        return fake_ds

# In[5]:

    chunk_dict = {"lat": 1, "lon": 1, 'prob': 1}
    #sub_chunk_dict = {'lat': 1, 'lon': 1, 'prob': 1}
    comp_dict = {'zlib': True, 'complevel': 9}

    ds_sub = ds.isel(
        lat=slice(0, 34, 1),  #  0-33
        lon=slice(0, 71, 1),  #  0-70
        prob=slice(prob_nr, prob_nr + 1, 1)  #  0-0
    ).chunk(chunk_dict)

    #ds_sub = ds.isel(
    #    lat=slice(28, 30, 1),
    #    lon=slice(38, 40, 1),
    #    prob=slice(0, 20, 1)
    #).chunk(chunk_dict)

    #ds_sub = ds.chunk(chunk_dict)

    #ds_sub

    # In[6]:

    def write_to_logfile(*args):
        t = time.localtime()
        current_time = time.strftime("%H:%M:%S", t)
        with open(logfilename, 'a') as f:
            t = (current_time, ) + args
            f.write(" ".join([str(s) for s in t]) + '\n')

    # there is no multi-dimensional 'groupby' in xarray data structures
    def nested_groupby_apply(dataset, groupby, apply_fn, **kwargs):
        if len(groupby) == 1:
            res = dataset.groupby(groupby[0]).apply(apply_fn, **kwargs)
            return res
        else:
            return dataset.groupby(groupby[0]).apply(nested_groupby_apply,
                                                     groupby=groupby[1:],
                                                     apply_fn=apply_fn,
                                                     **kwargs)

    def func_pwc_mr_fd(ds_single):
        #    print(ds_single)
        ds_res = CARDAMOMlib.compute_ds_pwc_mr_fd_greg(ds_single, comp_dict)
        write_to_logfile("finished single,", "lat:", ds_single.lat.data,
                         "lon:", ds_single.lon.data, "prob:",
                         ds_single.prob.data)

        return ds_res

    def func_chunk(chunk_ds):
        #        print('func_chunk', chunk_ds.lat.data, chunk_ds.lon.data)

        #        worker = get_worker()
        #        worker.memory_target_fraction = 0.95
        #        worker.memory_spill_fraction =  False
        #        worker.memory_pause_fraction = False
        #        worker.memory_terminate_fraction = False

        #        print(worker.memory_target_fraction, flush=True)
        #        print(worker.memory_spill_fraction, flush=True)
        #        print(worker.memory_pause_fraction, flush=True)
        #        print(worker.memory_terminate_fraction, flush=True)

        #    print('chunk started:', chunk_ds.lat[0].data, chunk_ds.lon[0].data, flush=True)
        res_ds = nested_groupby_apply(chunk_ds, ['lat', 'lon', 'prob'],
                                      func_pwc_mr_fd)

        # group_by removes the dimensions mentioned, so the resulting ds is
        # lower dimensional, unfortunatley, map_blocks does not do that and so
        # putting the sub result datasets back together becomes technically difficult
        #    chunk_fake_ds = make_fake_ds(chunk_ds).chunk(sub_chunk_dict)
        #    sub_chunk_ds = chunk_ds.chunk(sub_chunk_dict)
        #    res_ds = xr.map_blocks(func_pwc_mr_fd, sub_chunk_ds, template=chunk_fake_ds)

        print('chunk finished:',
              chunk_ds.lat[0].data,
              chunk_ds.lon[0].data,
              chunk_ds.prob[0].data,
              flush=True)
        #    write_to_logfile(
        #        'chunk finished,',
        #        "lat:", chunk_ds.lat[0].data,
        #        "lon:", chunk_ds.lon[0].data,
        #        "prob:", chunk_ds.prob[0].data
        #    )

        return res_ds

# In[7]:

    fake_ds = make_fake_ds(ds_sub).chunk(chunk_dict)
    ds_pwc_mr_fd = xr.map_blocks(func_chunk, ds_sub, template=fake_ds)

    # In[ ]:

    c = ds_sub.chunks
    nr_chunks = np.prod([len(val) for val in c.values()])
    nr_singles = len(ds_sub.lat) * len(ds_sub.lon) * len(ds_sub.prob)
    write_to_logfile('starting:', nr_chunks, "chunks, ", nr_singles, "singles")

    ds_pwc_mr_fd.to_netcdf(data_folder + filestem + output_folder +
                           "pwc_mr_fd_%04d.nc" % prob_nr,
                           compute=True)

    write_to_logfile('done')

    # In[ ]:

    ds.close()
    del ds
    ds_sub.close()
    del ds_sub
    ds_pwc_mr_fd.close()
    del ds_pwc_mr_fd