def collect_data(work_queue, results_queue, grid_def_file): with nc.Dataset(grid_def_file) as gf: areas = gf.variables['area_t'][:] while True: input_file = work_queue.get() # Setup some variables from the input. f = nc.Dataset(input_file) lons = f.variables['geolon_t'] lats = f.variables['geolat_t'] time_var = f.variables['time'] temp_var = f.variables['temp'] ty_trans = f.variables['ty_trans'] depths = f.variables['st_ocean'][:] nh_mask = get_nh_mask(temp_var[0, 0, :, :].mask, lats) lat_s, lat_e, lon_s, \ lon_e = get_indices_for_amoc_idx_region(lons, lats) # Set up result vars. t = time_var.shape[0] z = len(f.dimensions['st_ocean']) y = len(f.dimensions['yt_ocean']) x = len(f.dimensions['xt_ocean']) nh_sst_average = np.empty((t,), dtype='f') * np.nan nh_sst = np.ma.empty((t, y, x), dtype='f') * np.nan amoc_index_sst = np.empty((t,), dtype='f') * np.nan amoc_psi = np.ma.empty((t, z, y), dtype='f') * np.nan amoc_psi_max = np.empty((t,), dtype='f') * np.nan amoc_psi_max_at_26n = np.empty((t,), dtype='f') * np.nan for t in range(time_var.shape[0]): # Get surface temp spatial mean in the NH nh_sst_average[t] = np.ma.average(np.ma.masked_array( temp_var[t, 0, :, :], mask=nh_mask), weights=areas) # SST nh_sst[t, :, :] = temp_var[t, 0, :, :] # Spatial average SST in AMOC index region amoc_index_sst[t] = np.average(temp_var[t, 0, lat_s:lat_e, lon_s:lon_e], weights=areas[lat_s:lat_e, lon_s:lon_e]) # Get AMOC psi and max within chosen region. amoc_psi[t, :, :] = calc_atlantic_moc(ty_trans[t, :, :, :], lons, lats) amoc_psi_max[t] = max_within_region(amoc_psi[t,:,:], 500.0, 30.0, depths, lats) amoc_psi_max_at_26n[t] = max_at_lat(amoc_psi[t,:,:], 26.5, lats) f.close() results = (('nh_sst_average', nh_sst_average), ('nh_sst', nh_sst), ('amoc_index_sst', amoc_index_sst), ('amoc_psi', amoc_psi), ('amoc_psi_max', amoc_psi_max), ('amoc_psi_max_at_26n', amoc_psi_max_at_26n)) results_queue.put((input_file, results))
def make_amoc_idx_maps(file): """ Make amoc index maps for every time point in file, also returns the max of the amoc stream function for all time points. """ with nc.Dataset(file) as f: lons = f.variables['geolon_t'] lats = f.variables['geolat_t'] time_var = f.variables['time'] temp_var = f.variables['temp'] nh_mask = get_nh_mask(temp_var[0, 0, :, :].mask) ty_trans = f.variables['ty_trans'] dzt = f.variables['dzt'] amoc_idx_map = np.ma.zeros(temp_var[:,0,:,:].shape) amoc_psi_max = [] for t in range(time_var.shape[0]): amoc, atlantic_mask, _, _, _ = \ calc_atlantic_moc(ty_trans[t, :, :, :], dzt[t, :, :, :], lons, lats) amoc_psi_max.append(np.max(amoc)) # Get surface temp spatial mean in the NH nh_sst_mean = np.mean(np.ma.masked_array(temp_var[t, 0, :, :], mask=nh_mask)) # Map of the AMOC index, see (1) amoc_idx_map[t, :, :] = temp_var[t, 0, :, :] - nh_sst_mean amoc_idx_map[t, :, :].mask = atlantic_mask[0, :, :] periods = time_dim_to_pandas_periods(time_var) amoc_psi_max_ts = pd.Series(amoc_psi_max, periods) time_check = time_var[:] print('+', end='') sys.stdout.flush() # Pass back time variable just to check that everything is put back in the # correct order. return amoc_idx_map, amoc_psi_max_ts, time_check, nh_sst_mean, nh_mask
def preprocess_data(fname, input_files, grid_def_file): """ Create a file for the preprocessed data. It will contain: 1) timeseries of the max AMOC psi (1d), 2) timeseries of the AMOC index (1d), 3) timeseries of AMOC psi (3d), 4) timeseries of SST diff to NH mean (3d) """ print('master with pid {}'.format(os.getpid())) print('Preprocessing data ...', end='') start_time = time.time() # Get time dim for all input files p = mp.Pool() tv = p.map(get_time_var, input_files) p.close() p.join() tv_sizes = [a.shape[0] for a in tv] tv_total = sum(tv_sizes) assert tv_total > 0 # Now collect data. This will be done by a pool of workers num_workers = max(1, mp.cpu_count() / 2) job_queue = mp.Queue(len(input_files)) results_queue = mp.Queue(num_workers) # Start workers workers = [] for w in range(num_workers): args = (job_queue, results_queue, grid_def_file) w = mp.Process(target=collect_data, args=args) workers.append(w) w.start() # Put all jobs in the queue for f in input_files: job_queue.put(f) # Setup output file with nc.Dataset(input_files[0]) as inf: z = len(inf.dimensions['st_ocean']) y = len(inf.dimensions['yt_ocean']) x = len(inf.dimensions['xt_ocean']) ty_trans = inf.variables['ty_trans'][0, :] lats = inf.variables['geolat_t'][:] lons = inf.variables['geolon_t'][:] f = h5py.File(fname) f.create_dataset('time', (tv_total,), dtype='f', chunks=True) f.create_dataset('nh_sst_average', (tv_total,), dtype='f', chunks=True) f.create_dataset('nh_sst', (tv_total, y, x), dtype='f', chunks=True) f.create_dataset('amoc_index_sst', (tv_total,), dtype='f', chunks=True) f.create_dataset('amoc_psi', (tv_total, z, y), dtype='f', chunks=True) f.create_dataset('amoc_psi_max', (tv_total,), dtype='f', chunks=True) f.create_dataset('amoc_psi_max_at_26n', (tv_total,), dtype='f', chunks=True) f.create_dataset('atlantic_mask', (z, y, x), dtype='i', chunks=True) f.create_dataset('amoc_psi_mask', (z, y), dtype='i', chunks=True) f['time'][:] = np.concatenate(tv) f['atlantic_mask'][:] = get_atlantic_mask(ty_trans.mask, lons, lats) f['amoc_psi_mask'][:] = calc_atlantic_moc(ty_trans, lons, lats).mask f.flush() # A dictionary containing the global indices for each input file global_idx = np.split(range(tv_total), np.cumsum(tv_sizes)[:-1]) assert len(global_idx) == len(input_files) indx_dict = dict(zip(input_files, global_idx)) # Get data from workers load into file at the correct index. for i in range(len(input_files)): input_file, result_arrays = results_queue.get() indx = indx_dict[input_file] for name, data in result_arrays: if len(data.shape) == 1: f[name][indx[0]:indx[-1]+1] = data[:] elif len(data.shape) == 2: f[name][indx[0]:indx[-1]+1, :] = data[:, :] elif len(data.shape) == 3: f[name][indx[0]:indx[-1]+1, :, :] = data[:, :, :] else: import pdb pdb.set_trace() assert False print('.', end='') sys.stdout.flush() # Terminate workers job_queue.close() results_queue.close() for w in workers: w.terminate() for w in workers: w.join() f.close() print(' finished in {} seconds'.format(time.time() - start_time))
def visit_data_file(args): """ Visit a data file and collect/calculate the following: 1. AMOC maximum timeseries 2. AMOC mean timeseries 3. AMOC index timeseries (see (1) for definition) 4. AMOC psi timeseries as a numpy array, shape (t, depth, lat) 5. Surface plot of difference between SST and NH spatial mean SST, shape (t, lat, lon) """ file, grid_def_file, do_depth_correlation_plot, \ do_surface_correlation_plot = args # Try with annual data. use_annual = False if use_annual: _, tmp_file = run_ncra(file, ['geolon_t', 'geolat_t', 'time', 'temp', 'ty_trans']) f = nc.Dataset(tmp_file) else: f = nc.Dataset(file) lons = f.variables['geolon_t'] lats = f.variables['geolat_t'] time_var = f.variables['time'] temp_var = f.variables['temp'] ty_trans = f.variables['ty_trans'] depths = np.cumsum(f.variables['st_ocean'][:]) t_dim = len(f.dimensions['time']) z_dim = len(f.dimensions['st_ocean']) x_dim = len(f.dimensions['xt_ocean']) y_dim = len(f.dimensions['yt_ocean']) gf = nc.Dataset(grid_def_file) areas = gf.variables['area_t'][:] gf.close() nh_mask = get_nh_mask(temp_var[0, 0, :, :].mask, lats) atlantic_mask = get_atlantic_mask(ty_trans[0, :, :].mask, lons, lats) lat_start, lat_end, lon_start, \ lon_end = get_indices_for_amoc_idx_region(lons, lats) amoc_max_ts = [] amoc_mean_ts = [] amoc_idx_ts = [] amoc_psi_ts = np.ma.zeros((t_dim, z_dim, y_dim)) sst_nh_diff_ts = np.ma.zeros((t_dim, y_dim, x_dim)) for t in range(time_var.shape[0]): # Get surface temp spatial mean in the NH nh_sst_mean = np.ma.average(np.ma.masked_array(temp_var[t, 0, :, :], mask=nh_mask), weights=areas) # Get AMOC max and mean. amoc_psi = calc_atlantic_moc(ty_trans[t, :, :, :], lons, lats) amoc_idx_sst = np.average(temp_var[t, 0, lat_start:lat_end, lon_start:lon_end], weights=areas[lat_start:lat_end, lon_start:lon_end]) # Calculate the AMOC index amoc_idx_ts.append(amoc_idx_sst - nh_sst_mean) amoc_max_ts.append(max_within_region(amoc_psi, 500.0, 35.0, depths, lats)) amoc_mean_ts.append(np.mean(amoc_psi)) # Get AMOC psi timeseries if do_depth_correlation_plot: amoc_psi_ts[t, :, :] = amoc_psi[:, :] # Get the surface difference between temp and NH SST mean if do_surface_correlation_plot: sst_nh_diff_ts[t, :, :] = \ np.ma.masked_array(temp_var[t, 0, :, :] - nh_sst_mean, mask=atlantic_mask[0, :, :]) # Add time dim to pandas timeseries periods = time_dim_to_pandas_periods(time_var) if use_annual: periods = [periods[len(periods) / 2]] amoc_idx_ts = pd.Series(amoc_idx_ts, periods) amoc_max_ts = pd.Series(amoc_max_ts, periods) amoc_mean_ts = pd.Series(amoc_mean_ts, periods) f.close() if use_annual: os.remove(tmp_file) ret = [None, None, None, None, None] ret[0] = amoc_idx_ts ret[1] = amoc_max_ts ret[2] = amoc_mean_ts if do_depth_correlation_plot: ret[3] = amoc_psi_ts if do_surface_correlation_plot: ret[4] = sst_nh_diff_ts print('^', end='') return tuple(ret)