示例#1
0
def collect_data(work_queue, results_queue, grid_def_file):

    with nc.Dataset(grid_def_file) as gf:
        areas = gf.variables['area_t'][:]

    while True:
        input_file = work_queue.get()

        # Setup some variables from the input.
        f = nc.Dataset(input_file)
        lons = f.variables['geolon_t']
        lats = f.variables['geolat_t']
        time_var = f.variables['time']
        temp_var = f.variables['temp']
        ty_trans = f.variables['ty_trans']
        depths = f.variables['st_ocean'][:]

        nh_mask = get_nh_mask(temp_var[0, 0, :, :].mask, lats)
        lat_s, lat_e, lon_s, \
            lon_e = get_indices_for_amoc_idx_region(lons, lats)

        # Set up result vars.
        t = time_var.shape[0]
        z = len(f.dimensions['st_ocean'])
        y = len(f.dimensions['yt_ocean'])
        x = len(f.dimensions['xt_ocean'])

        nh_sst_average = np.empty((t,), dtype='f') * np.nan
        nh_sst = np.ma.empty((t, y, x), dtype='f') * np.nan
        amoc_index_sst = np.empty((t,), dtype='f') * np.nan
        amoc_psi = np.ma.empty((t, z, y), dtype='f') * np.nan
        amoc_psi_max = np.empty((t,), dtype='f') * np.nan
        amoc_psi_max_at_26n = np.empty((t,), dtype='f') * np.nan

        for t in range(time_var.shape[0]):
            # Get surface temp spatial mean in the NH
            nh_sst_average[t] = np.ma.average(np.ma.masked_array(
                                                temp_var[t, 0, :, :],
                                                mask=nh_mask), weights=areas)
            # SST
            nh_sst[t, :, :] = temp_var[t, 0, :, :]

            # Spatial average SST in AMOC index region
            amoc_index_sst[t] = np.average(temp_var[t, 0, lat_s:lat_e, lon_s:lon_e],
                                        weights=areas[lat_s:lat_e, lon_s:lon_e])

            # Get AMOC psi and max within chosen region.
            amoc_psi[t, :, :] = calc_atlantic_moc(ty_trans[t, :, :, :],
                                                    lons, lats)
            amoc_psi_max[t] = max_within_region(amoc_psi[t,:,:],
                                                    500.0, 30.0, depths, lats)
            amoc_psi_max_at_26n[t] = max_at_lat(amoc_psi[t,:,:], 26.5, lats)

        f.close()
        results = (('nh_sst_average', nh_sst_average), ('nh_sst', nh_sst),
                    ('amoc_index_sst', amoc_index_sst), ('amoc_psi', amoc_psi),
                    ('amoc_psi_max', amoc_psi_max),
                    ('amoc_psi_max_at_26n', amoc_psi_max_at_26n))
        results_queue.put((input_file, results))
def make_amoc_idx_maps(file):
    """
    Make amoc index maps for every time point in file, also returns the max of
    the amoc stream function for all time points.
    """

    with nc.Dataset(file) as f:

        lons = f.variables['geolon_t']
        lats = f.variables['geolat_t']
        time_var = f.variables['time']
        temp_var = f.variables['temp']
        nh_mask = get_nh_mask(temp_var[0, 0, :, :].mask)
        ty_trans = f.variables['ty_trans']
        dzt = f.variables['dzt']

        amoc_idx_map = np.ma.zeros(temp_var[:,0,:,:].shape)
        amoc_psi_max = []

        for t in range(time_var.shape[0]):

            amoc, atlantic_mask, _, _, _ = \
                calc_atlantic_moc(ty_trans[t, :, :, :],
                                    dzt[t, :, :, :], lons, lats)
            amoc_psi_max.append(np.max(amoc))

            # Get surface temp spatial mean in the NH
            nh_sst_mean = np.mean(np.ma.masked_array(temp_var[t, 0, :, :],
                                                mask=nh_mask))
            # Map of the AMOC index, see (1)
            amoc_idx_map[t, :, :] = temp_var[t, 0, :, :] - nh_sst_mean
            amoc_idx_map[t, :, :].mask = atlantic_mask[0, :, :]

        periods = time_dim_to_pandas_periods(time_var)
        amoc_psi_max_ts = pd.Series(amoc_psi_max, periods)
        time_check = time_var[:]

    print('+', end='')
    sys.stdout.flush()

    # Pass back time variable just to check that everything is put back in the
    # correct order.
    return amoc_idx_map, amoc_psi_max_ts, time_check, nh_sst_mean, nh_mask
示例#3
0
def preprocess_data(fname, input_files, grid_def_file):
    """
    Create a file for the preprocessed data. It will contain:
    1) timeseries of the max AMOC psi (1d),
    2) timeseries of the AMOC index (1d),
    3) timeseries of AMOC psi (3d),
    4) timeseries of SST diff to NH mean (3d)
    """

    print('master with pid {}'.format(os.getpid()))
    print('Preprocessing data ...', end='')
    start_time = time.time()

    # Get time dim for all input files
    p = mp.Pool()
    tv = p.map(get_time_var, input_files)
    p.close()
    p.join()
    tv_sizes = [a.shape[0] for a in tv]
    tv_total = sum(tv_sizes)
    assert tv_total > 0

    # Now collect data. This will be done by a pool of workers
    num_workers = max(1, mp.cpu_count() / 2)
    job_queue = mp.Queue(len(input_files))
    results_queue = mp.Queue(num_workers)

    # Start workers
    workers = []
    for w in range(num_workers):
        args = (job_queue, results_queue, grid_def_file)
        w = mp.Process(target=collect_data, args=args)
        workers.append(w)
        w.start()

    # Put all jobs in the queue
    for f in input_files:
        job_queue.put(f)

    # Setup output file
    with nc.Dataset(input_files[0]) as inf:
        z = len(inf.dimensions['st_ocean'])
        y = len(inf.dimensions['yt_ocean'])
        x = len(inf.dimensions['xt_ocean'])
        ty_trans = inf.variables['ty_trans'][0, :]
        lats = inf.variables['geolat_t'][:]
        lons = inf.variables['geolon_t'][:]

    f = h5py.File(fname)
    f.create_dataset('time', (tv_total,), dtype='f', chunks=True)
    f.create_dataset('nh_sst_average', (tv_total,), dtype='f', chunks=True)
    f.create_dataset('nh_sst', (tv_total, y, x), dtype='f', chunks=True)
    f.create_dataset('amoc_index_sst', (tv_total,), dtype='f', chunks=True)
    f.create_dataset('amoc_psi', (tv_total, z, y), dtype='f', chunks=True)
    f.create_dataset('amoc_psi_max', (tv_total,), dtype='f', chunks=True)
    f.create_dataset('amoc_psi_max_at_26n', (tv_total,), dtype='f', chunks=True)
    f.create_dataset('atlantic_mask', (z, y, x), dtype='i', chunks=True)
    f.create_dataset('amoc_psi_mask', (z, y), dtype='i', chunks=True)

    f['time'][:] = np.concatenate(tv)
    f['atlantic_mask'][:] = get_atlantic_mask(ty_trans.mask, lons, lats)
    f['amoc_psi_mask'][:] = calc_atlantic_moc(ty_trans, lons, lats).mask
    f.flush()

    # A dictionary containing the global indices for each input file
    global_idx = np.split(range(tv_total), np.cumsum(tv_sizes)[:-1])
    assert len(global_idx) == len(input_files)
    indx_dict = dict(zip(input_files, global_idx))

    # Get data from workers load into file at the correct index.
    for i in range(len(input_files)):
        input_file, result_arrays = results_queue.get()
        indx = indx_dict[input_file]

        for name, data in result_arrays:
            if len(data.shape) == 1:
                f[name][indx[0]:indx[-1]+1] = data[:]
            elif len(data.shape) == 2:
                f[name][indx[0]:indx[-1]+1, :] = data[:, :]
            elif len(data.shape) == 3:
                f[name][indx[0]:indx[-1]+1, :, :] = data[:, :, :]
            else:
                import pdb
                pdb.set_trace()
                assert False
        print('.', end='')
        sys.stdout.flush()

    # Terminate workers
    job_queue.close()
    results_queue.close()
    for w in workers:
        w.terminate()

    for w in workers:
        w.join()

    f.close()

    print(' finished in {} seconds'.format(time.time() - start_time))
示例#4
0
def visit_data_file(args):
    """
    Visit a data file and collect/calculate the following:

    1. AMOC maximum timeseries
    2. AMOC mean timeseries
    3. AMOC index timeseries (see (1) for definition)
    4. AMOC psi timeseries as a numpy array, shape (t, depth, lat)
    5. Surface plot of difference between SST and NH spatial mean SST,
        shape (t, lat, lon)
    """

    file, grid_def_file, do_depth_correlation_plot, \
        do_surface_correlation_plot = args

    # Try with annual data.
    use_annual = False
    if use_annual:
        _, tmp_file = run_ncra(file, ['geolon_t', 'geolat_t', 'time', 'temp', 'ty_trans'])
        f = nc.Dataset(tmp_file)
    else:
        f = nc.Dataset(file)

    lons = f.variables['geolon_t']
    lats = f.variables['geolat_t']
    time_var = f.variables['time']
    temp_var = f.variables['temp']
    ty_trans = f.variables['ty_trans']
    depths = np.cumsum(f.variables['st_ocean'][:])
    t_dim = len(f.dimensions['time'])
    z_dim = len(f.dimensions['st_ocean'])
    x_dim = len(f.dimensions['xt_ocean'])
    y_dim = len(f.dimensions['yt_ocean'])

    gf = nc.Dataset(grid_def_file)
    areas = gf.variables['area_t'][:]
    gf.close()

    nh_mask = get_nh_mask(temp_var[0, 0, :, :].mask, lats)
    atlantic_mask = get_atlantic_mask(ty_trans[0, :, :].mask, lons, lats)

    lat_start, lat_end, lon_start, \
    lon_end = get_indices_for_amoc_idx_region(lons, lats)

    amoc_max_ts = []
    amoc_mean_ts = []
    amoc_idx_ts = []
    amoc_psi_ts = np.ma.zeros((t_dim, z_dim, y_dim))
    sst_nh_diff_ts = np.ma.zeros((t_dim, y_dim, x_dim))

    for t in range(time_var.shape[0]):
        # Get surface temp spatial mean in the NH
        nh_sst_mean = np.ma.average(np.ma.masked_array(temp_var[t, 0, :, :],
                                        mask=nh_mask), weights=areas)

        # Get AMOC max and mean.
        amoc_psi = calc_atlantic_moc(ty_trans[t, :, :, :], lons, lats)
        amoc_idx_sst = np.average(temp_var[t, 0, lat_start:lat_end, lon_start:lon_end],
                                      weights=areas[lat_start:lat_end, lon_start:lon_end])
        # Calculate the AMOC index
        amoc_idx_ts.append(amoc_idx_sst - nh_sst_mean)
        amoc_max_ts.append(max_within_region(amoc_psi, 500.0, 35.0, depths, lats))
        amoc_mean_ts.append(np.mean(amoc_psi))

        # Get AMOC psi timeseries
        if do_depth_correlation_plot:
            amoc_psi_ts[t, :, :] = amoc_psi[:, :]

        # Get the surface difference between temp and NH SST mean
        if do_surface_correlation_plot:
            sst_nh_diff_ts[t, :, :] = \
                np.ma.masked_array(temp_var[t, 0, :, :] - nh_sst_mean,
                                   mask=atlantic_mask[0, :, :])


    # Add time dim to pandas timeseries
    periods = time_dim_to_pandas_periods(time_var)
    if use_annual:
        periods = [periods[len(periods) / 2]]
    amoc_idx_ts = pd.Series(amoc_idx_ts, periods)
    amoc_max_ts = pd.Series(amoc_max_ts, periods)
    amoc_mean_ts = pd.Series(amoc_mean_ts, periods)

    f.close()
    if use_annual:
        os.remove(tmp_file)

    ret = [None, None, None, None, None]

    ret[0] = amoc_idx_ts
    ret[1] = amoc_max_ts
    ret[2] = amoc_mean_ts
    if do_depth_correlation_plot:
        ret[3] = amoc_psi_ts
    if do_surface_correlation_plot:
        ret[4] = sst_nh_diff_ts

    print('^', end='')

    return tuple(ret)