def assemble_global_var(var): files = glob.glob(stats.var_dir["W"]+"/*pkl") tile_list = [l.split('_')[1][:-4] for l in files] print("found %i tiles" % len(tile_list)) nlat = len(latglo) nlon = len(longlo) nz = len(tools.zref) z3d = np.zeros((nlat, nlon, nz)) bb = tiles.read_tiles() pbs = [] for tile in tile_list: print("\r %8s" % tile, end="") jj, ii = get_slices(tile) d = stats.read(tile, var, transpose=False) d[np.isnan(d)] = fill_value #print(len(jj),len(ii), np.shape(d) ,np.shape(z3d)) try: z3d[jj, ii, :] = d except: z3d[jj, ii, :] = fill_value pbs += [tile] print("\npb with tiles ", pbs) #z3d = np.ma.array(z3d,mask=z3d==fill_value) return np.transpose(z3d, (2, 0, 1))
def split_global_into_tiles(): bb = tiles.read_tiles() subd_list = [a + b for a, b in itertools.product("01", "0123")] for subd in subd_list: print("update tiles profiles %s" % subd) tile_list = [t for t in bb.keys() if t[:2] == subd] argo = {} for tile in tile_list: argo[tile] = tiles.read_argo_tile(tile) purge = False for var in zref_var: f = global_profiles_file % (var, subd) if os.path.exists(f): prof = pd.read_pickle(f) for tile in tile_list: print("\r %4s - " % var, end="") idx = argo[tile].index data = {var: prof.loc[idx]} write_profiles(tile, data, variables=[var]) else: print(" %4s - %s is empty" % (var, tile)) # purge tiles/profiles folder for tile in tile_list: f = tiles_profiles_file % (var_dir[var], tile) command = "rm %s" % f print(command) #os.system(command) print() apply_timestamp()
def monoproc_job(): tasks, keys = define_tasks(resume=True) bb = tiles.read_tiles() for itask in tasks: tile = keys[itask] print('** processes tile %s' % tile) stats.compute_stats(bb, tile)
def write_subd_from_tiles(subd): bb = tiles.read_tiles() assert subd in subd_list print("gather global atlas in %s" % subd) tile_list = [k for k in bb.keys() if k[:len(subd)] == subd] ncfile = tile_atlas_file % subd print("assemble %s" % ncfile) jj, ii = get_slices(subd) # offset i0 = ii.start j0 = jj.start lonf = longlo[ii] latf = latglo[jj] nz = len(stats.zref) create_empty_netcdf_file(ncfile, latf, lonf) for tile in tile_list: jj, ii = get_slices(tile) #jj = [j-j0 for j in jj] #ii = [i-i0 for i in ii] ii = slice(ii.start-i0, ii.stop-i0, None) jj = slice(jj.start-j0, jj.stop-j0, None) nj = jj.stop-jj.start ni = ii.stop-ii.start z3d = np.zeros((nz, nj, ni)) #print(longlo[ii], latglo[jj]) for var in stats.var_stats: print("\r %s - %s " % (tile, var), end="") data = stats.read(tile, var) # # TODO: remove this dirty fix # data[data<-10]=np.nan # if var!="W": # data[data>50]=np.nan #print( tile, np.shape(z3d), np.shape(data)) z3d[:, :, :] = data with Dataset(ncfile, "r+") as nc: #print(np.shape(q), nj, ni) #print(jj[0],jj[-1], ii[0],ii[-1]) #print(tile, z3d.shape, nj, ni, jj, ii) for kz in range(nz): nc.variables[var][kz, jj, ii] = z3d[kz][:, :] print()
def gather_global_from_tiles(): bb = tiles.read_tiles() subd_list = [a + b for a, b in itertools.product("01", "0123")] for subd in subd_list: print("update global profiles %s", subd) tile_list = [k for k in bb.keys() if k[:2] == subd] data = {} for var in zref_var: data[var] = [] for tile in tile_list: prof = read_profiles(tile) for var in zref_var: data[var] += [prof[var]] for var in zref_var: f = global_profiles_file % (var, subd) prof = pd.concat(data[var]) prof.to_pickle(f)
def master_job(nslaves, resume=False): # define the master director master = mns.Master(nslaves) argo = tools.read_argodb() bb = tiles.read_tiles() keys = list(bb.keys()) work = workload(bb) if resume: missing = [ k for k in keys if not (os.path.exists(interp.tiles_profiles_file % (interp.var_dir["CT"], k))) ] weight = [work[k] for k in missing] idx = np.argsort(weight) tasks = idx[::-1] keys = missing else: weight = [work[k] for k in keys] idx = np.argsort(weight) tasks = idx[::-1] #tiles.split_global_into_tiles(bb, argo) print(tasks) pd.to_pickle(keys, file_tiles_to_interpolate) # master defines the tasks master.barrier(0) # slaves work master.async_distrib(tasks) master.barrier(1) # gather DataFrame tiles.gather_global_from_tiles() # gather profiles interp.gather_global_from_tiles() # master gathers the dataframes master.barrier(2)
def define_tasks(resume=False): argo = tools.read_argodb() bb = tiles.read_tiles() keys = list(bb.keys()) work = workload(bb) if resume: d = stats.var_dir["CT"] keys = [ k for k in keys if not (os.path.exists(stats.tiles_file % (d, k))) ] weight = [work[k] for k in keys] idx = np.argsort(weight) tasks = list(idx[::-1]) tile_list = [keys[t] for t in tasks] #print(tasks) #print(tile_list) return (tasks, keys)
def master_job(nslaves, resume=False): # define the master director master = mns.Master(nslaves, verbose=False) stats.create_folders() obj = define_tasks(resume=resume) tasks, keys = mns.bcast(obj) bb = tiles.read_tiles() bb = mns.bcast(bb) # master defines the tasks #print("MASTER has tasks: ", tasks) master.barrier(0) # slaves work master.async_distrib(tasks) master.barrier(1) # master gathers the dataframes master.barrier(2)
data=True, dataqc=True, verbose=False) for tag in todo_tags: iprof = aa.loc[tag, "IPROF"] #print(" - iprof %i" % iprof) d = get_iprof(data, iprof) zref_d, ok = it.raw_to_zref(d, zref) if ok: # update the status in the database a.loc[tag, "STATUS"] = "D" for var in zref_var: zref_data[var].loc[tag, :] = zref_d[var][:] else: a.STATUS[tag] = "F" tiles.write_argo_tile(tile, a) write_profiles(tile, zref_data) if __name__ == '__main__': bb = tiles.read_tiles() for tile in list(bb.keys())[:12]: print('-' * 40) update_profiles_in_tile(tile)