# EXPS =\ # { # 'GLOBAL_NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, # #'GLOBAL_ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, # #'GLOBAL_ITER_NOAC': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, # #'GLOBAL_ITER_ADV': {'sw_ac' : [],'sw_ap': True,'sw_lit': False}, # #'IOPS_ITER_ADV':{'sw_ac' : ['adv',],'sw_ap': True,'sw_lit': False}, # # 'IOPS_W': {'sw_ac' : ['w',],'sw_ap': True,'sw_lit': False}, # # 'IOPS_AC': {'sw_ac' : ['adv','w'],'sw_ap': True,'sw_lit': False}, # } if args.load_globaldata: # iniitialize global data globaldata = data_global() # ... and load initial data pages globaldata.load_datasets(recalc=0) else: globaldata = None c4gldata = [] c4gldata.append(c4gl_interface_soundings( \ '/data/gent/vo/000/gvo00090/D2D/data/C4GL/20181030/IOPS_ADV_ITER/',\ '/data/gent/vo/000/gvo00090/D2D/data/SOUNDINGS/IOPS/',\ globaldata,\ refetch_records=False,\ obs_filter = True,\ tendencies_revised = args.tendencies_revised\ ))
def execute(**kwargs): # note that with args, we actually mean the same as those specified with # the argparse module above # overwrite the args according to the kwargs when the procedure is called # as module function for key, value in kwargs.items(): args.__dict__[key] = value print("-- begin arguments --") for key, value in args.__dict__.items(): print(key, ': ', value) print("-- end arguments ----") # load specified class4gl library if args.c4gl_path_lib is not None: sys.path.insert(0, args.c4gl_path_lib) from class4gl import class4gl_input, data_global, class4gl from interface_multi import stations, stations_iterator, records_iterator, get_record_yaml, get_records from class4gl import blh, class4gl_input # this is a variant of global run in which the output of runs are still written # out even when the run crashes. # #only include the following timeseries in the model output # timeseries_only = \ # ['Cm', 'Cs', 'G', 'H', 'L', 'LE', 'LEpot', 'LEref', 'LEsoil', 'LEveg', 'Lwin', # 'Lwout', 'Q', 'RH_h', 'Rib', 'Swin', 'Swout', 'T2m', 'dq', 'dtheta', # 'dthetav', 'du', 'dv', 'esat', 'gammaq', 'gammatheta', 'h', 'q', 'qsat', # 'qsurf', 'ra', 'rs', 'theta', 'thetav', 'time', 'u', 'u2m', 'ustar', 'uw', # 'v', 'v2m', 'vw', 'wq', 'wtheta', 'wthetae', 'wthetav', 'wthetae', 'zlcl'] if (args.global_vars is not None): globaldata = data_global() globaldata.load_datasets(recalc=0) # ======================== print("getting a list of stations") # ======================== # these are all the stations that are found in the input dataset all_stations = stations(args.path_forcing, suffix=args.subset_forcing, refetch_stations=False) # ==================================== print('defining all_stations_select') # ==================================== # these are all the stations that are supposed to run by the whole batch (all # chunks). We narrow it down according to the station(s) specified. if args.station_id is not None: print("Selecting station by ID") stations_iter = stations_iterator(all_stations) STNID, run_station = stations_iter.set_STNID( STNID=int(args.station_id)) all_stations_select = pd.DataFrame([run_station]) else: print("Selecting stations from a row range in the table") all_stations_select = pd.DataFrame(all_stations.table) if args.last_station_row is not None: all_stations_select = all_station_select.iloc[:( int(args.last_station) + 1)] if args.first_station_row is not None: all_stations_select = all_station_select.iloc[int(args. first_station):] print("station numbers included in the whole batch "+\ "(all chunks):",list(all_stations_select.index)) print(all_stations_select) print("getting all records of the whole batch") all_records_morning_select = get_records(all_stations_select,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) # only run a specific chunck from the selection if args.global_chunk_number is not None: if args.station_chunk_number is not None: raise ValueError( 'You need to specify either global-chunk-number or station-chunk-number, not both.' ) if (args.split_by is None) or (args.split_by <= 0): raise ValueError( "global_chunk_number is specified, but --split_by is not a strict positive number, so I don't know how to split the batch into chunks." ) run_station_chunk = None print( 'determining the station and its chunk number according global_chunk_number (' + args.global_chunk_number + ')') totalchunks = 0 stations_iter = all_stations_select.iterrows() in_current_chunk = False try: while not in_current_chunk: istation, current_station = stations_iter.__next__() all_records_morning_station_select = all_records_morning_select.query( 'STNID == ' + str(current_station.name)) chunks_current_station = math.ceil( float(len(all_records_morning_station_select)) / float(args.split_by)) print('chunks_current_station', chunks_current_station) in_current_chunk = (int(args.global_chunk_number) < (totalchunks + chunks_current_station)) if in_current_chunk: run_stations = pd.DataFrame([ current_station ]) # run_stations.loc[(int(args.__dict__['last_station'])] run_station_chunk = int( args.global_chunk_number) - totalchunks totalchunks += chunks_current_station except StopIteration: raise ValueError( "Could not determine station chunk number. --global_chunk_number (" + args.global_chunk_number + ") outside of range [0," + str(totalchunks) + '[') print("station = ", list(run_stations.index)) print("station chunk number:", run_station_chunk) # if no global chunk is specified, then run the whole station selection in one run, or # a specific chunk for each selected station according to # args.station_chunk_number else: run_stations = pd.DataFrame( all_stations_select ) # run_stations.loc[(int(args.__dict__['last_station'])] if args.station_chunk_number is not None: run_station_chunk = int(args.station_chunk_number) print("station(s) that is processed.", list(run_stations.index)) print("chunk number: ", run_station_chunk) else: if args.split_by is not None: raise ValueError( "Chunks are defined by --split_by, but I don't know which chunk to run. Please provide --global_chunk_number or --station_chunk_number, or leave out --split_by." ) run_station_chunk = 0 print("stations that are processed.", list(run_stations.index)) #print(all_stations) print('Fetching initial/forcing records') records_morning = get_records(run_stations,\ args.path_forcing,\ subset=args.subset_forcing, refetch_records=False, ) if len(records_morning) == 0: raise IOError("No initialization records records found in "+\ args.path_forcing+' (subset: '+args_subset_forcing+')') # note that if runtime is an integer number, we don't need to get the afternoon # profiles. path_output = args.path_output os.system('mkdir -p ' + path_output) for istation, current_station in run_stations.iterrows(): # records_morning_station = records_morning.query('STNID == '+str(current_station.name)) records_morning_station = records_morning.loc[(current_station.name):( current_station.name)] fn_morning = args.path_forcing + '/' + format( current_station.name, '05d') + '_' + args.subset_forcing + '.yaml' if os.path.isfile(fn_morning): file_morning = open(fn_morning) else: fn_morning = \ args.path_forcing+'/'+format(current_station.name,'05d')+\ '_'+str(run_station_chunk)+'_'+args.subset_forcing+'.yaml' file_morning = open(fn_morning) # if args.runtime == 'from_profile_pair': # file_afternoon = open(args.path_forcing+'/'+format(current_station.name,'05d')+'_end.yaml') fn_ini = path_output+'/'+format(current_station.name,'05d')+'_'+\ str(int(run_station_chunk))+'_ini.yaml' file_ini = open(fn_ini, 'w') #iexp = 0 onerun = False print('starting station chunk number: '\ +str(run_station_chunk)+' (chunk size:',args.split_by,')') skip_chunk = False if 'chunk' in records_morning.index.names: records_morning_station_chunk = records_morning_station.loc[( current_station.name, run_station_chunk):(current_station.name, run_station_chunk)] else: start_record = run_station_chunk * args.split_by if run_station_chunk is not 0 else 0 end_record = ( run_station_chunk + 1) * args.split_by if args.split_by is not None else None if start_record >= (len(records_morning_station)): print("warning: outside of profile number range for station "+\ str(current_station)+". Skipping chunk number for this station.") skip_chunk = True records_morning_station_chunk = None else: records_morning_station_chunk = records_morning_station.iloc[ start_record: end_record] # [(int(args.split_by)*run_station_chunk):(int(args.split_by)*(run_station_chunk+1))] if not skip_chunk: isim = 0 for (STNID, chunk, index ), record_morning in records_morning_station_chunk.iterrows(): print('starting '+str(isim+1)+' out of '+\ str(len(records_morning_station_chunk) )+\ ' (station total: ',str(len(records_morning_station)),')') c4gli_morning = get_record_yaml(file_morning, record_morning.index_start, record_morning.index_end, mode='model_input') if args.global_vars is not None: c4gli_morning.get_global_input( globaldata, only_keys=args.global_vars.strip().split(':')) onerun = True print("dumping to " + str(file_ini) + ' (' + fn_ini + ')') c4gli_morning.dump(file_ini) isim += 1 file_ini.close() file_morning.close() if onerun: records_ini = get_records(pd.DataFrame([current_station]),\ path_output,\ getchunk = int(run_station_chunk),\ subset='ini', refetch_records=True, ) else: # remove empty files os.system('rm ' + fn_ini)