def execute(tasks, filter_files=True, multi_threaded=False): valid_fx_tasks = execute_tasks( [t for t in tasks if cmor_target.get_freq(t.target) == 0], filter_files, multi_threaded=False, once=True) valid_other_tasks = execute_tasks( [t for t in tasks if cmor_target.get_freq(t.target) != 0], filter_files, multi_threaded=multi_threaded, once=False) return valid_fx_tasks + valid_other_tasks
def validate_tasks(tasks): global varstasks varstasks = {} valid_tasks = [] for task in tasks: if not isinstance(task.source, cmor_source.ifs_source): continue codes = task.source.get_root_codes() target_freq = cmor_target.get_freq(task.target) grid_key = task.source.grid_ for c in codes: levtype, levels = get_levels(task, c) for l in levels: if task.status == cmor_task.status_failed: break key = (c.var_id, c.tab_id, levtype, l, task.source.grid_) match_key = key if levtype == grib_file.hybrid_level_code: matches = [ k for k in varsfreq.keys() if k[:3] == key[:3] and k[4] == key[4] ] match_key = key if not any(matches) else matches[0] if c.var_id == 134 and len(codes) == 1: matches = [k for k in varsfreq.keys() if k[:3] == key[:3]] match_key = key if not any(matches) else matches[0] if any(matches): grid_key = match_key[4] if match_key not in varsfreq: log.error( "Field missing in the first day of file: " "code %d.%d, level type %d, level %d. Dismissing task %s in table %s" % (key[0], key[1], key[2], key[3], task.target.variable, task.target.table)) task.set_failed() break if 0 < target_freq < varsfreq[match_key]: log.error( "Field has too low frequency for target %s: " "code %d.%d, level type %d, level %d. Dismissing task %s in table %s" % (task.target.variable, key[0], key[1], key[2], key[3], task.target.variable, task.target.table)) task.set_failed() break if task.status != cmor_task.status_failed: for c in codes: levtype, levels = get_levels(task, c) for l in levels: key = (c.var_id, c.tab_id, levtype, l, grid_key) if key in varstasks: varstasks[key].append(task) else: varstasks[key] = [task] valid_tasks.append(task) return valid_tasks
def validate_tasks(tasks): varstasks = {} valid_tasks = [] for task in tasks: if not isinstance(task.source, cmor_source.ifs_source): continue codes = task.source.get_root_codes() target_freq = cmor_target.get_freq(task.target) matched_keys = [] matched_grid = None for c in codes: levtype, levels = get_levels(task, c) for level in levels: if task.status == cmor_task.status_failed: break match_key = soft_match_key(c.var_id, c.tab_id, levtype, level, task.source.grid_, varsfreq.keys()) if match_key is None: log.error( "Field missing in the first day of file: " "code %d.%d, level type %d, level %d. Dismissing task %s in table %s" % (c.var_id, c.tab_id, levtype, level, task.target.variable, task.target.table)) task.set_failed() break if 0 < target_freq < varsfreq[match_key]: log.error( "Field has too low frequency for target %s: " "code %d.%d, level type %d, level %d. Dismissing task %s in table %s" % (task.target.variable, c.var_id, c.tab_id, levtype, level, task.target.variable, task.target.table)) task.set_failed() break if matched_grid is None: matched_grid = match_key[4] else: if match_key[4] != matched_grid: log.warning( "Task %s in table %s depends on both gridpoint and spectral fields" % (task.target.variable, task.target.table)) matched_keys.append(match_key) if task.status != cmor_task.status_failed: # Fix for zg and ps on gridpoints and spectral fields on height levels: task.source.grid_ = matched_grid for key in matched_keys: if key in varstasks: varstasks[key].append(task) else: varstasks[key] = [task] valid_tasks.append(task) return valid_tasks, varstasks
def execute_netcdf_task(task): global log task.next_state() filepath = getattr(task, cmor_task.output_path_key, None) if not filepath: log.error( "Could not find file containing data for variable %s in table %s" % (task.target.variable, task.target.table)) return store_var = getattr(task, "store_with", None) surf_pressure_task = getattr(task, "sp_task", None) surf_pressure_path = getattr(surf_pressure_task, "path", None) if surf_pressure_task else None if store_var and not surf_pressure_path: log.error( "Could not find file containing surface pressure for model level variable...skipping variable %s in table " "%s" % (task.target.variable, task.target.table)) return axes = [] t_bnds = [] if hasattr(task, "grid_id"): task_grid_id = getattr(task, "grid_id") if isinstance(task_grid_id, tuple): axes.extend([a for a in task_grid_id if a is not None]) else: axes.append(task_grid_id) if hasattr(task, "z_axis_id"): axes.append(getattr(task, "z_axis_id")) if hasattr(task, "t_axis_id"): axes.append(getattr(task, "t_axis_id")) t_bnds = time_axis_bnds.get(getattr(task, "t_axis_id"), []) try: dataset = netCDF4.Dataset(filepath, 'r') except Exception as e: log.error( "Could not read netcdf file %s while cmorizing variable %s in table %s. Cause: %s" % (filepath, task.target.variable, task.target.table, e.message)) return try: ncvars = dataset.variables dataset.set_auto_mask(False) codestr = str(task.source.get_grib_code().var_id) varlist = [ v for v in ncvars if str(getattr(ncvars[v], "code", None)) == codestr ] if len(varlist) == 0: varlist = [v for v in ncvars if str(v) == "var" + codestr] if task.target.variable == "areacella": varlist = ["cell_area"] if len(varlist) == 0: log.error( "No suitable variable found in cdo-produced file %s fro cmorizing variable %s in table %s... " "dismissing task" % (filepath, task.target.variable, task.target.table)) task.set_failed() return if len(varlist) > 1: log.warning( "CDO variable retrieval resulted in multiple (%d) netcdf variables; will take first" % len(varlist)) ncvar = ncvars[varlist[0]] unit = getattr(ncvar, "units", None) if (not unit) or hasattr(task, cmor_task.conversion_key): unit = getattr(task.target, "units") if len(getattr(task.target, "positive", "")) > 0: var_id = cmor.variable(table_entry=str(task.target.variable), units=str(unit), axis_ids=axes, positive="down") else: var_id = cmor.variable(table_entry=str(task.target.variable), units=str(unit), axis_ids=axes) flip_sign = (getattr(task.target, "positive", None) == "up") factor, term = get_conversion_constants( getattr(task, cmor_task.conversion_key, None), getattr(task, cmor_task.output_frequency_key)) time_dim, index = -1, 0 for d in ncvar.dimensions: if d.startswith("time"): time_dim = index break index += 1 time_selection = None time_stamps = cmor_utils.read_time_stamps(filepath) if any(time_stamps) and len(t_bnds) > 0: time_slice_map = [] for bnd in t_bnds: candidates = [t for t in time_stamps if bnd[0] <= t <= bnd[1]] if any(candidates): time_slice_map.append(time_stamps.index(candidates[0])) else: log.warning( "For variable %s in table %s, no valid time point could be found at %s...inserting " "missing values" % (task.target.variable, task.target.table, str(bnd[0]))) time_slice_map.append(-1) time_selection = numpy.array(time_slice_map) mask = getattr(task.target, cmor_target.mask_key, None) mask_array = masks[mask].get("array", None) if mask in masks else None missval = getattr(task.target, cmor_target.missval_key, 1.e+20) if flip_sign: missval = -missval cmor_utils.netcdf2cmor(var_id, ncvar, time_dim, factor, term, store_var, get_sp_var(surf_pressure_path), swaplatlon=False, fliplat=True, mask=mask_array, missval=missval, time_selection=time_selection, force_fx=(cmor_target.get_freq( task.target) == 0)) cmor.close(var_id) task.next_state() if store_var: cmor.close(store_var) finally: dataset.close()
def execute(tasks, nthreads=1): global log, start_date_, auto_filter_ supported_tasks = [ t for t in filter_tasks(tasks) if t.status == cmor_task.status_initialized ] log.info("Executing %d IFS tasks..." % len(supported_tasks)) mask_tasks = get_mask_tasks(supported_tasks) fx_tasks = [ t for t in supported_tasks if cmor_target.get_freq(t.target) == 0 ] surf_pressure_tasks = get_sp_tasks(supported_tasks) regular_tasks = [ t for t in supported_tasks if t not in surf_pressure_tasks and cmor_target.get_freq(t.target) != 0 ] # No fx filtering needed, cdo can handle this file if ifs_init_gridpoint_file_.endswith("+000000"): tasks_to_filter = surf_pressure_tasks + regular_tasks tasks_no_filter = fx_tasks + mask_tasks for task in tasks_no_filter: # dirty hack for orography being in ICMGG+000000 file... if task.target.variable in ["orog", "areacella"]: task.source.grid_ = cmor_source.ifs_grid.point if task.source.grid_id() == cmor_source.ifs_grid.spec: setattr(task, cmor_task.filter_output_key, [ifs_init_spectral_file_]) else: setattr(task, cmor_task.filter_output_key, [ifs_init_gridpoint_file_]) setattr(task, cmor_task.output_frequency_key, 0) else: tasks_to_filter = mask_tasks + fx_tasks + surf_pressure_tasks + regular_tasks tasks_no_filter = [] if auto_filter_: tasks_todo = tasks_no_filter + grib_filter.execute( tasks_to_filter, filter_files=do_post_process(), multi_threaded=(nthreads > 1)) else: tasks_todo = tasks_no_filter for task in tasks_to_filter: if task.source.grid_id() == cmor_source.ifs_grid.point: setattr(task, cmor_task.filter_output_key, ifs_gridpoint_files_.values()) tasks_todo.append(task) elif task.source.grid_id() == cmor_source.ifs_grid.spec: setattr(task, cmor_task.filter_output_key, ifs_spectral_files_.values()) tasks_todo.append(task) else: log.error( "Task ifs source has unknown grid for %s in table %s" % (task.target.variable, task.target.table)) task.set_failed() for task in tasks_todo: setattr(task, cmor_task.output_frequency_key, get_output_freq(task)) # First post-process surface pressure and mask tasks for task in list( set(tasks_todo).intersection(mask_tasks + surf_pressure_tasks)): postproc.post_process(task, temp_dir_, do_post_process()) for task in list(set(tasks_todo).intersection(mask_tasks)): read_mask(task.target.variable, getattr(task, cmor_task.output_path_key)) proctasks = list(set(tasks_todo).intersection(regular_tasks + fx_tasks)) if nthreads == 1: for task in proctasks: cmor_worker(task) else: pool = multiprocessing.Pool(processes=nthreads) pool.map(cmor_worker, proctasks) for task in proctasks: setattr(task, cmor_task.output_path_key, postproc.get_output_path(task, temp_dir_)) if cleanup_tmpdir(): clean_tmp_data(tasks_todo)
def execute(tasks, nthreads=1): global log, start_date_, auto_filter_ supported_tasks = [t for t in filter_tasks(tasks) if t.status == cmor_task.status_initialized] log.info("Executing %d IFS tasks..." % len(supported_tasks)) mask_tasks = get_mask_tasks(supported_tasks) fx_tasks = [t for t in supported_tasks if cmor_target.get_freq(t.target) == 0] regular_tasks = [t for t in supported_tasks if cmor_target.get_freq(t.target) != 0] script_tasks = [t for t in supported_tasks if validate_script_task(t) is not None] # Scripts in charge of their own filtering, can create a group of variables at once script_tasks_no_filter = [t for t in script_tasks if validate_script_task(t) == "false"] # Scripts creating single variable, filtering done by ece2cmor3 script_tasks_filter = list(set(script_tasks) - set(script_tasks_no_filter)) req_ps_tasks, extra_ps_tasks = get_sp_tasks(supported_tasks) # No fx filtering needed, cdo can handle this file if ifs_init_gridpoint_file_.endswith("+000000"): tasks_to_filter = extra_ps_tasks + regular_tasks + script_tasks_filter tasks_no_filter = fx_tasks + mask_tasks for task in tasks_no_filter: # dirty hack for orography being in ICMGG+000000 file... if task.target.variable in ["orog", "areacella"]: task.source.grid_ = cmor_source.ifs_grid.point if task.source.grid_id() == cmor_source.ifs_grid.spec: setattr(task, cmor_task.filter_output_key, [ifs_init_spectral_file_]) else: setattr(task, cmor_task.filter_output_key, [ifs_init_gridpoint_file_]) setattr(task, cmor_task.output_frequency_key, 0) else: tasks_to_filter = mask_tasks + fx_tasks + extra_ps_tasks + regular_tasks + script_tasks_filter tasks_no_filter = [] np = nthreads # Launch no-filter scripts jobs = [] tasks_per_script = cmor_utils.group(script_tasks_no_filter, lambda tsk: getattr(tsk, cmor_task.postproc_script_key)) for s, tasklist in tasks_per_script.items(): log.info("Launching script %s to process variables %s" % (s, ','.join([t.target.variable + " in " + t.target.table for t in tasklist]))) script_args = (s, str(scripts[s]["src"]), tasklist) if np == 1: script_worker(*script_args) else: p = multiprocessing.Process(name=s, target=script_worker, args=script_args) p.start() jobs.append(p) np -= 1 # Do filtering if auto_filter_: tasks_todo = tasks_no_filter + grib_filter.execute(tasks_to_filter, filter_files=do_post_process(), multi_threaded=(nthreads > 1)) else: tasks_todo = tasks_no_filter for task in tasks_to_filter: if task.source.grid_id() == cmor_source.ifs_grid.point: setattr(task, cmor_task.filter_output_key, ifs_gridpoint_files_.values()) tasks_todo.append(task) elif task.source.grid_id() == cmor_source.ifs_grid.spec: setattr(task, cmor_task.filter_output_key, ifs_spectral_files_.values()) tasks_todo.append(task) else: log.error("Task ifs source has unknown grid for %s in table %s" % (task.target.variable, task.target.table)) task.set_failed() for task in tasks_todo: setattr(task, cmor_task.output_frequency_key, get_output_freq(task)) # First post-process surface pressure and mask tasks for task in list(set(tasks_todo).intersection(mask_tasks + req_ps_tasks + extra_ps_tasks)): postproc.post_process(task, temp_dir_, do_post_process()) for task in list(set(tasks_todo).intersection(mask_tasks)): read_mask(task.target.variable, getattr(task, cmor_task.output_path_key)) proctasks = list(set(tasks_todo).intersection(regular_tasks + fx_tasks)) if np == 1: for task in proctasks: cmor_worker(task) else: pool = multiprocessing.Pool(processes=np) pool.map(cmor_worker, proctasks) for task in proctasks: setattr(task, cmor_task.output_path_key, postproc.get_output_path(task, temp_dir_)) for job in jobs: job.join() if cleanup_tmpdir(): clean_tmp_data(tasks_todo)