def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' ########################################### # reorganize analog txt file for javascript # and find associated config file ########################################### # Reformat data file output by the analogs detection process so that # it can be read by the analogues viewer template. try: # Get the output csv file of analogs process (input by user in # text box) analogs = rename_complexinputs(request.inputs['analog_result'])[0] # analogs = request.inputs['analog_result'][0] LOGGER.info("analogs file path %s ", analogs) configfile = "dummy.txt" # anlg.get_viewer_configfile(analogs) analogs_mod = anlg.reformat_analogs(analogs) response.outputs['output_txt'].file = analogs_mod # output_data LOGGER.info("analogs for visualisation prepared") except Exception: msg = 'Failed to reformat analogs file' LOGGER.exception(msg) raise Exception(msg) try: output_av = anlg.render_viewer(configfile=configfile, datafile=analogs_mod) LOGGER.info('Viewer html page generated') response.update_status( 'Successfully generated analogs viewer html page', 90) response.outputs['output_html'].file = output_av LOGGER.info('output_av: %s ', output_av) except Exception: msg = 'Failed to generate viewer' LOGGER.exception(msg) raise Exception(msg) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : %s ' % dt.now(), 10) ################################ # reading in the input arguments ################################ try: response.update_status( 'execution started at : {}'.format(dt.now()), 20) ################################ # reading in the input arguments ################################ LOGGER.info('read in the arguments') resource = archiveextract( resource=[res.file for res in request.inputs['resource']]) # If files are from different datasets. # i.e. files: ...output1/slp.1999.nc and ...output2/slp.1997.nc will not be sorted with just .sort() # So: if type(resource) == list: resource = sorted( resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] season = request.inputs['season'][0].data LOGGER.info('season %s', season) bboxDef = '-80,50,20,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug( 'BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) period = request.inputs['period'][0].data LOGGER.info('period %s', period) anualcycle = request.inputs['anualcycle'][0].data kappa = request.inputs['kappa'][0].data LOGGER.info('kappa %s', kappa) method = request.inputs['method'][0].data LOGGER.info('Calc annual cycle with %s', method) sseas = request.inputs['sseas'][0].data LOGGER.info('Annual cycle calc with %s', sseas) start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') # OCGIS for models workaround - to catch 31 of Dec start = dt.combine(start, dt_time(12, 0)) end = dt.combine(end, dt_time(12, 0)) cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[1] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] LOGGER.debug('Reference start: %s , end: %s ' % (reference[0], reference[1])) reference[0] = dt.combine(reference[0], dt_time(12, 0)) reference[1] = dt.combine(reference[1], dt_time(12, 0)) LOGGER.debug('New Reference start: %s , end: %s ' % (reference[0], reference[1])) # Check if 360_day calendar (all months are exactly 30 days): try: modcal, calunits = get_calendar(resource[0]) if '360_day' in modcal: if start.day == 31: start = start.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (start)) if end.day == 31: end = end.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (end)) if reference[0].day == 31: reference[0] = reference[0].replace(day=30) LOGGER.debug('Date has been changed for: %s' % (reference[0])) if reference[1].day == 31: reference[1] = reference[1].replace(day=30) LOGGER.debug('Date has been changed for: %s' % (reference[1])) except: LOGGER.debug('Could not detect calendar') LOGGER.debug('start: %s , end: %s ', start, end) LOGGER.info('bbox %s', bbox) LOGGER.info('period %s', period) LOGGER.info('season %s', season) except Exception as e: msg = 'failed to read in the arguments' LOGGER.exception(msg) raise Exception(msg) ############################################################ # get the required bbox and time region from resource data ############################################################ response.update_status('start subsetting', 30) from blackswan.ocgis_module import call from blackswan.utils import get_variable, get_timerange time_range = [start, end] tmp_resource = [] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= end) and (tmpEn >= start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource # Here start trick with z... levels and regriding... # Otherwise call will give memory error for hires models like MIROC4h # TODO: Add level and domain selection as in wps_analogs_model for 4D var. variable = get_variable(resource) from blackswan.datafetch import reanalyses import uuid ref_var = 'slp' refR = 'NCEP' ref_rea = reanalyses(start=2014, end=2014, variable=ref_var, dataset=refR) regr_res = [] for z in resource: tmp_n = 'tmp_%s' % (uuid.uuid1()) # XXXXXXX s, e = get_timerange(z) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') tmpSt = dt.combine(tmpSt, dt_time(0, 0)) tmpEn = dt.combine(tmpEn, dt_time(23, 0)) if ((tmpSt <= start) and (tmpEn >= end)): LOGGER.debug('Resource contains full record, selecting : %s ' % (time_range)) full_res = call(z, variable=variable, time_range=time_range) else: full_res = z LOGGER.debug( 'The subset from the big model file, or initial file: %s ' % (full_res)) # XXXXXXX # TODO: regrid needs here (???) # Check how to manage one big file with geopotential # TODO: # Adapt to work with levels for geopotential (check how its done for reanalysis process) # b0=call(resource=z, variable=variable, b0 = call(resource=full_res, variable=variable, spatial_wrapping='wrap', cdover='system', regrid_destination=ref_rea[0], regrid_options='bil', prefix=tmp_n) # TODO: Use cdo regrid outside call - before call... # Some issues with produced ocgis file inside ocgis_module cdo regrid: # cdo remapbil (Abort): Unsupported projection coordinates (Variable: psl)! # select domain b01 = call(resource=b0, geom=bbox, spatial_wrapping='wrap', prefix='levregr_' + path.basename(z)[0:-3]) tbr = 'rm -f %s' % (b0) system(tbr) tbr = 'rm -f %s.nc' % (tmp_n) system(tbr) # get full resource regr_res.append(b01) model_subset = call(regr_res, time_range=time_range) # TODO: CHANGE to cross-platform for i in regr_res: tbr = 'rm -f %s' % (i) system(tbr) # Get domain # from cdo import Cdo # from os import environ # cdo = Cdo(env=environ) # regr_res = [] # for res_fn in resource: # tmp_f = 'dom_' + path.basename(res_fn) # comcdo = '%s,%s,%s,%s' % (bbox[0],bbox[2],bbox[1],bbox[3]) # cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f) # # tmp_f = call(resource=res_fn, geom=bbox, spatial_wrapping='wrap', prefix=tmp_f) # regr_res.append(tmp_f) # LOGGER.debug('File with selected domain: %s ' % (tmp_f)) # model_subset = call( # # resource=resource, variable=variable, # resource=regr_res, variable=variable, # geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to # ) LOGGER.info('Dataset subset done: %s ' % model_subset) response.update_status('dataset subsetted', 40) ##################### # computing anomalies ##################### response.update_status('computing anomalies ', 50) model_anomal = wr.get_anomalies(model_subset, reference=reference, method=method, sseas=sseas) ################### # extracting season #################### model_season = wr.get_season(model_anomal, season=season) response.update_status('values normalized', 60) #################### # call the R scripts #################### response.update_status('Start weather regime clustering ', 70) import shlex import subprocess from blackswan import config from os.path import curdir, exists, join try: # rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season # model_subset #model_ponderate # modelname = 'MODEL' # yr1 = start.year # yr2 = end.year ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL', '%s' % kappa ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) except Exception as e: msg = 'failed to build the R command %s' % e LOGGER.error(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() # ,shell=True LOGGER.info('R outlog info:\n %s ' % output) LOGGER.debug('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** weatherregime in R suceeded', 80) else: LOGGER.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e LOGGER.error(msg) raise Exception(msg) response.update_status('Weather regime clustering done ', 90) ############################################ # set the outputs ############################################ # response.update_status('Set the process outputs ', 95) response.outputs['Routput_graphic'].file = output_graphics response.outputs['output_pca'].file = file_pca response.outputs['output_classification'].file = file_class response.outputs['output_netcdf'].file = model_season response.update_status('done', 100) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' response.update_status('execution started at : %s ' % dt.now(), 5) ################################ # reading in the input arguments ################################ try: response.update_status( 'execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ LOGGER.info('read in the arguments') nsim = request.inputs['nsim'][0].data LOGGER.info('nsim %s', nsim) yfile = request.inputs['yfile'][0].file LOGGER.info('yfile %s', yfile) anafile1 = request.inputs['anafile1'][0].file LOGGER.info('anafile1 %s', anafile1) anafile2 = request.inputs['anafile2'][0].file LOGGER.info('anafile2 %s', anafile2) except Exception as e: msg = 'failed to read in the arguments' LOGGER.exception(msg) raise Exception(msg) #################### # call the R scripts #################### response.update_status('Start anattribution', 50) import shlex import subprocess import pandas import matplotlib.pyplot as plt from blackswan import config from os.path import curdir, exists, join # try: ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf', prefix='anna_plots') LOGGER.info('output_graphics %s', output_graphics) ip, output_txt = mkstemp(dir=curdir, suffix='.txt', prefix='anna_ysim') LOGGER.info('output_txt %s', output_txt) # compute the average of temperature in January 2018 # ytable = pandas.read_table(yfile, sep = " ", skipinitialspace = True) # idx = [x >= 20180101 and x < 20190101 for x in ytable.date] # tas_jan18 = ytable.iloc[idx, 1] # tas_jan18 = tas_jan18.mean(axis=0) # generate other possible realisations of temperature for January 2018 conditionnaly to the atmospheric circulation #for period P1 ysim_p1 = analogs_generator(anafile=anafile1, yfile=yfile, nsim=nsim) ymean_p1 = ysim_p1.mean(axis=1) print ysim_p1 #for period P2 ysim_p2 = analogs_generator(anafile=anafile2, yfile=yfile, nsim=nsim) ymean_p2 = ysim_p2.mean(axis=1) print ysim_p2 LOGGER.info('analogue generator done!') # Format the data into a Data.Frame to plot boxplots plotdat = pandas.concat([ymean_p1, ymean_p2], axis=1) plotdat.columns = ["P1", "P2"] print plotdat plotdat.to_csv(output_txt, sep=' ', index=False, header=True) LOGGER.info('writing output_txt done!') fig1 = plt.figure() # plt.axvline(x=tas_jan18) plt.hist(ymean_p1, alpha=0.5, label='P1') plt.hist(ymean_p2, alpha=0.5, label='P2') plt.legend(loc='upper right') LOGGER.info('plot1 done!') fig2 = plt.figure() plt.boxplot([ymean_p1, ymean_p2]) LOGGER.info('plot2 done!') #plt.axhline(y=tas_jan18) from matplotlib.backends.backend_pdf import PdfPages pp = PdfPages(output_graphics) pp.savefig(fig1) pp.savefig(fig2) pp.close() response.update_status('**** anatribution in R suceeded', 90) LOGGER.info('saving plots done!') #except Exception as e: # msg = 'Error in analogues_generator %s ' % e # LOGGER.error(msg) # raise Exception(msg) response.update_status('Anattribution done ', 92) ############################################ # set the outputs ############################################ response.update_status('Set the process outputs ', 95) response.outputs['Py_output_graphic'].file = output_graphics LOGGER.info('output_graphics %s', response.outputs) response.outputs['Ysims'].file = output_txt LOGGER.info('output_graphics %s', response.outputs) response.update_status('done', 100) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' response.update_status('execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ try: LOGGER.info('read in the arguments') # resources = self.getInputValues(identifier='resources') season = request.inputs['season'][0].data LOGGER.info('season %s', season) period = request.inputs['period'][0].data LOGGER.info('period %s', period) anualcycle = request.inputs['anualcycle'][0].data start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') LOGGER.debug('start: %s , end: %s ' % (start, end)) resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) # resource = archiveextract(resource=[res.file for res in request.inputs['resource']]) url_Rdat = request.inputs['Rdat'][0].data url_dat = request.inputs['dat'][0].data url_ref_file = request.inputs['netCDF'][0].data # can be None # season = self.getInputValues(identifier='season')[0] # period = self.getInputValues(identifier='period')[0] # anualcycle = self.getInputValues(identifier='anualcycle')[0] LOGGER.info('period %s' % str(period)) LOGGER.info('season %s' % str(season)) LOGGER.info('read in the arguments') LOGGER.info('url_ref_file: %s' % url_ref_file) LOGGER.info('url_Rdat: %s' % url_Rdat) LOGGER.info('url_dat: %s' % url_dat) except Exception as e: LOGGER.debug('failed to convert arguments %s ' % e) ############################ # fetching trainging data ############################ try: dat = abspath(download(url_dat)) Rdat = abspath(download(url_Rdat)) LOGGER.info('training data fetched') except Exception as e: LOGGER.error('failed to fetch training data %s' % e) ########################################################## # get the required bbox and time region from resource data ########################################################## # from flyingpigeon.weatherregimes import get_level try: from blackswan.ocgis_module import call from blackswan.utils import get_variable time_range = [start, end] variable = get_variable(resource) if len(url_ref_file) > 0: ref_file = download(url_ref_file) model_subset = call( resource=resource, variable=variable, time_range= time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', regrid_destination=ref_file, regrid_options='bil') LOGGER.info('Dataset subset with regridding done: %s ' % model_subset) else: model_subset = call( resource=resource, variable=variable, time_range= time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', ) LOGGER.info('Dataset time period extracted: %s ' % model_subset) except: LOGGER.exception('failed to make a data subset ') ####################### # computing anomalies ####################### try: cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[1] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] model_anomal = wr.get_anomalies(model_subset, reference=reference, sseas='multi') ##################### # extracting season ##################### model_season = wr.get_season(model_anomal, season=season) except: LOGGER.exception('failed to compute anualcycle or seasons') ####################### # call the R scripts ####################### import shlex import subprocess from blackswan import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_projection.R' yr1 = start.year yr2 = end.year time = get_time(model_season) # , format='%Y%m%d') # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') ip, output_frec = mkstemp(dir=curdir, suffix='.txt') args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % model_season, '%s' % variable, '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""), # '%s' % output_graphics, '%s' % dat, '%s' % Rdat, '%s' % file_pca, '%s' % file_class, '%s' % output_frec, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL' ] LOGGER.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e LOGGER.error(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() # , shell=True LOGGER.info('R outlog info:\n %s ' % output) LOGGER.debug('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** weatherregime in R suceeded', 90) else: LOGGER.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e LOGGER.error(msg) raise Exception(msg) ################# # set the outputs ################# response.update_status('Set the process outputs ', 95) response.outputs['output_pca'].file = file_pca response.outputs['output_classification'].file = file_class response.outputs['output_netcdf'].file = model_season response.outputs['output_frequency'].file = output_frec response.update_status('done', 100) return response
def _handler(self, request, response): # LOGGER.debug('CURDIR XXXX : %s ' % (abspath(curdir))) # LOGGER.debug('WORKDIR XXXX : %s ' % (self.workdir)) os.chdir(self.workdir) # LOGGER.debug('CURDIR XXXX : %s ' % (abspath(curdir))) init_process_logger('log.txt') # init_process_logger(os.path.join(self.workdir, 'log.txt')) # response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 6) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data timres = request.inputs['timeres'][0].data bboxDef = '-20,40,30,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) normalize = request.inputs['normalize'][0].data detrend = request.inputs['detrend'][0].data plot = request.inputs['plot'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 7) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.exception(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: response.update_status('Preparing enviroment converting arguments', 8) LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt)) start = min(refSt, dateSt) end = max(refEn, dateEn) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.exception(msg) raise Exception(msg) ########################### # set the environment ########################### response.update_status('fetching data from archive', 9) try: if model == 'NCEP': getlevel = False if 'z' in var: level = var.strip('z') # conform_units_to = None else: level = None if var == 'precip': var = 'pr_wtr' # conform_units_to = 'hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: level = var.strip('z') # conform_units_to = None else: level = None # conform_units_to = 'hPa' else: LOGGER.exception('Reanalyses dataset not known') LOGGER.info('environment set for model: %s' % model) except Exception: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017: # - ALL the data will be downloaded, 1950 - 2017 try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=var, timres=timres, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 10) LOGGER.debug("start and end time: %s - %s" % (start, end)) time_range = [start, end] # Checking memory and dataset size model_size = get_files_size(model_nc) memory_avail = psutil.virtual_memory().available thrs = 0.3 # 30% if (model_size >= thrs * memory_avail): ser_r = True else: ser_r = False # ################################ # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years # if ('20CRV2' in model) and ('z' in var): if ('z' in var): tmp_total = [] origvar = get_variable(model_nc) for z in model_nc: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level) else: if ser_r: LOGGER.debug('Process reanalysis step-by-step') tmp_total = [] for z in model_nc: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap', prefix='Rdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) model_subset_tmp = call(resource=tmp_total, variable=var, time_range=time_range) else: LOGGER.debug('Using whole dataset at once') model_subset_tmp = call(resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=time_range, ) # If dataset is 20CRV2 the 6 hourly file should be converted to daily. # Option to use previously 6h data from cache (if any) and not download daily files. # Disabled for now # if '20CRV2' in model: # if timres == '6h': # from cdo import Cdo # cdo = Cdo(env=os.environ) # model_subset = '%s.nc' % uuid.uuid1() # tmp_f = '%s.nc' % uuid.uuid1() # cdo_op = getattr(cdo, 'daymean') # cdo_op(input=model_subset_tmp, output=tmp_f) # sti = '00:00:00' # cdo_op = getattr(cdo, 'settime') # cdo_op(sti, input=tmp_f, output=model_subset) # LOGGER.debug('File Converted from: %s to daily' % (timres)) # else: # model_subset = model_subset_tmp # else: # model_subset = model_subset_tmp # Remove \/\/\/ if work with 6h data... model_subset = model_subset_tmp LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 15) # BLOCK OF DETRENDING of model_subset ! # Original model subset kept to further visualisaion if needed # Now is issue with SLP: # TODO 1 Keep trend as separate file # TODO 2 Think how to add options to plot abomalies AND original data... # May be do archive and simulation = call.. over NOT detrended data and keep it as well # TODO 3 Check with faster smoother add removing trend of each grid if detrend == 'None': orig_model_subset = model_subset else: orig_model_subset = remove_mean_trend(model_subset, varname=var) # ====================================== LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 20) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 30) start_time = time.time() # measure data preperation ... try: # Construct descriptive filenames for the three files # listed in config file # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') # Fix < 1900 issue... refDatesString = refSt.isoformat().strip().split("T")[0] + "_" + refEn.isoformat().strip().split("T")[0] simDatesString = dateSt.isoformat().strip().split("T")[0] + "_" + dateEn.isoformat().strip().split("T")[0] archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=model_subset, time_range=[refSt, refEn], prefix=archiveNameString) simulation = call(resource=model_subset, time_range=[dateSt, dateEn], prefix=simNameString) LOGGER.info('archive and simulation files generated: %s, %s' % (archive, simulation)) except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.exception(msg) raise Exception(msg) try: if seacyc is True: LOGGER.info('normalization function with method: %s ' % normalize) seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = seasoncyc_sim = None except Exception as e: msg = 'failed to generate normalization files %s ' % e LOGGER.exception(msg) raise Exception(msg) output_file = 'output.txt' files = [os.path.abspath(archive), os.path.abspath(simulation), output_file] LOGGER.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=model, sim_id=model, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], period=[refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0]], bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox)) response.update_status('generated config file', 40) ####################### # CASTf90 call ####################### start_time = time.time() # measure call castf90 # ----------------------- try: import ctypes # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) os.environ['MKL_NUM_THREADS'] = str(nth) os.environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) # ----------------------- # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ############### # ##### MUST be removed after castf90 recompiled with the latest hdf version # ##### NOT safe os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1' # hdflib = os.path.expanduser("~") + '/anaconda/lib' # hdflib = os.getenv("HOME") + '/anaconda/lib' import pwd hdflib = pwd.getpwuid(os.getuid()).pw_dir + '/anaconda/lib' os.environ['LD_LIBRARY_PATH'] = hdflib # ################################################################################ response.update_status('Start CASTf90 call', 50) try: # response.update_status('execution of CASTf90', 50) cmd = ['analogue.out', config_file] LOGGER.debug("castf90 command: %s", cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOGGER.info('analogue output:\n %s', output) response.update_status('**** CASTf90 suceeded', 60) except CalledProcessError as e: msg = 'CASTf90 failed:\n{0}'.format(e.output) LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) # TODO: Add try - except for pdfs if plot == 'Yes': analogs_pdf = analogs.plot_analogs(configfile=config_file) else: analogs_pdf = 'dummy_plot.pdf' with open(analogs_pdf, 'a'): os.utime(analogs_pdf, None) response.update_status('preparing output', 70) response.outputs['analog_pdf'].file = analogs_pdf response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive if seacyc is True: response.outputs['base_netcdf'].file = seasoncyc_base response.outputs['sim_netcdf'].file = seasoncyc_sim else: # TODO: Still unclear how to overpass unknown number of outputs dummy_base = 'dummy_base.nc' dummy_sim = 'dummy_sim.nc' with open(dummy_base, 'a'): os.utime(dummy_base, None) with open(dummy_sim, 'a'): os.utime(dummy_sim, None) response.outputs['base_netcdf'].file = dummy_base response.outputs['sim_netcdf'].file = dummy_sim ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 80) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) response.outputs['output_log'].file = 'log.txt' return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : %s ' % dt.now(), 10) ################################ # reading in the input arguments ################################ try: response.update_status('execution started at : {}'.format(dt.now()), 20) ################################ # reading in the input arguments ################################ LOGGER.debug('read in the arguments') LOGGER.debug('request.input[files] %s ' % (request.inputs['files'])) # files = archiveextract(resource=[res.file for res in request.inputs['files']]) files = [res.file for res in request.inputs['files']] # files = request.inputs['files'][0].data.split(",") LOGGER.debug('files %s ' % (files)) # files = archiveextract(resource=files) LOGGER.debug('files %s ' % (files)) xname = request.inputs['xname'][0].data LOGGER.debug('xname: %s ' % (xname)) yname = request.inputs['yname'][0].data LOGGER.debug('yname: %s ' % (yname)) rcp = request.inputs['rcp'][0].data.lower() LOGGER.debug('rcp: %s ' % (rcp)) xfiles = [f for f in files if basename(f).split("_")[0] == xname] f_xhist = [f for f in xfiles if basename(f).split("_")[3] == "historical"] f_xrcp = [f for f in xfiles if basename(f).split("_")[3] in rcp] LOGGER.debug('xfiles %s ' % (xfiles)) LOGGER.debug('f_xhist %s ' % (f_xhist)) LOGGER.debug('f_xrcp %s ' % (f_xrcp)) yfiles = [f for f in files if basename(f).split("_")[0] == yname] f_yhist = [f for f in yfiles if basename(f).split("_")[3] == "historical"] f_yrcp = [f for f in yfiles if basename(f).split("_")[3] in rcp] LOGGER.debug('yfiles %s ' % (yfiles)) LOGGER.debug('f_yhist %s ' % (f_yhist)) LOGGER.debug('f_yrcp %s ' % (f_yrcp)) y_compute_ano = request.inputs['y_compute_ano'][0].data x_compute_ano = request.inputs['x_compute_ano'][0].data LOGGER.debug('Y compute_ano: %s ' % (y_compute_ano)) LOGGER.debug('X compute_ano: %s ' % (x_compute_ano)) y_start_ano = request.inputs['y_start_ano'][0].data y_end_ano = request.inputs['y_end_ano'][0].data x_start_ano = request.inputs['x_start_ano'][0].data x_end_ano = request.inputs['x_end_ano'][0].data LOGGER.debug('Y start_ano: %s ' % (y_start_ano)) LOGGER.debug('Y end_ano: %s ' % (y_end_ano)) LOGGER.debug('X start_ano: %s ' % (x_start_ano)) LOGGER.debug('X end_ano: %s ' % (x_end_ano)) y_bbox = request.inputs['y_bbox'][0].data x_bbox = request.inputs['x_bbox'][0].data LOGGER.debug('Y bbox: %s ' % (y_bbox)) LOGGER.debug('X bbox: %s ' % (x_bbox)) y_season = request.inputs['y_season'][0].data x_season = request.inputs['x_season'][0].data LOGGER.debug('Y season: %s ' % (y_season)) LOGGER.debug('X season: %s ' % (x_season)) y_spatial_aggregator = request.inputs['y_spatial_aggregator'][0].data x_spatial_aggregator = request.inputs['x_spatial_aggregator'][0].data LOGGER.debug('Y spatial_aggregator: %s ' % (y_spatial_aggregator)) LOGGER.debug('X spatial_aggregator: %s ' % (x_spatial_aggregator)) y_time_aggregator = request.inputs['y_time_aggregator'][0].data x_time_aggregator = request.inputs['x_time_aggregator'][0].data LOGGER.debug('Y time_aggregator: %s ' % (y_time_aggregator)) LOGGER.debug('X time_aggregator: %s ' % (x_time_aggregator)) y_first_spatial = request.inputs['y_first_spatial'][0].data x_first_spatial = request.inputs['x_first_spatial'][0].data LOGGER.debug('Y first_spatial: %s ' % (y_first_spatial)) LOGGER.debug('X first_spatial: %s ' % (x_first_spatial)) stat_model = request.inputs['stat_model'][0].data LOGGER.debug('stat_model: %s ' % (stat_model)) qthreshold = request.inputs['qthreshold'][0].data LOGGER.debug('qthreshold: %s ' % (qthreshold)) nbootstrap = request.inputs['nbootstrap'][0].data LOGGER.debug('nbootstrap: %s ' % (nbootstrap)) except Exception as e: msg = 'failed to read in the arguments %s' % e LOGGER.exception(msg) raise Exception(msg) #################### # Run farallnar.py #################### response.update_status('Start FARallnat analysis', 5) try: # ip, Routput_graphics = mkstemp(dir=curdir, suffix='.pdf') # ip, Rdata = mkstemp(dir=curdir, suffix='.rds') Rdata = "FARallnat_data.rds" Routput_graphics = "FARallnat_plots.pdf" # add xname and yname as arguments compute_far(plot_pdf = Routput_graphics, data_rds = Rdata, yvarname = yname, xvarname = xname, f_yhist = f_yhist, f_yrcp = f_yrcp, f_xhist = f_xhist, f_xrcp = f_xrcp, y_compute_ano = y_compute_ano, y_start_ano = y_start_ano, y_end_ano = y_end_ano, y_bbox = y_bbox, y_season = y_season, y_first_spatial = y_first_spatial, y_spatial_aggregator = y_spatial_aggregator, y_time_aggregator = y_time_aggregator, x_compute_ano = x_compute_ano, x_start_ano = x_start_ano, x_end_ano = x_end_ano, x_bbox = x_bbox, x_season = x_season, x_first_spatial = x_first_spatial, x_spatial_aggregator = x_spatial_aggregator, x_time_aggregator = x_time_aggregator, stat_model = stat_model, qthreshold = qthreshold, nbootstrap = nbootstrap ) except Exception as e: msg = 'fails in farallnat.py %s' % e LOGGER.error(msg) raise Exception(msg) response.update_status('FARallanat analysis done ', 90) ############################################ # set the outputs ############################################ # response.update_status('Set the process outputs ', 95) response.outputs['Routput_graphics'].file = Routput_graphics response.outputs['Rdata'].file = Rdata response.update_status('done', 100) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 6) dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data # timres = request.inputs['timeres'][0].data timres = 'day' season = request.inputs['season'][0].data bboxDef = '-20,40,30,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) distance = request.inputs['dist'][0].data method = request.inputs['method'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 7) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.exception(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### start = dateSt end = dateEn ########################### # set the environment ########################### response.update_status('fetching data from archive', 9) try: if model == 'NCEP': getlevel = False if 'z' in var: level = var.strip('z') # conform_units_to = None else: level = None if var == 'precip': var = 'pr_wtr' # conform_units_to = 'hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: level = var.strip('z') # conform_units_to = None else: level = None # conform_units_to = 'hPa' else: LOGGER.exception('Reanalyses dataset not known') LOGGER.info('environment set for model: %s' % model) except Exception: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017: # - ALL the data will be downloaded, 1950 - 2017 try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=var, timres=timres, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 10) # from flyingpigeon.weatherregimes import get_level LOGGER.debug("start and end time: %s - %s" % (start, end)) time_range = [start, end] # Checking memory and dataset size model_size = get_files_size(model_nc) memory_avail = psutil.virtual_memory().available thrs = 0.5 # 50% if (model_size >= thrs * memory_avail): ser_r = True else: ser_r = False # ################################ # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years # if ('20CRV2' in model) and ('z' in var): if ('z' in var): tmp_total = [] origvar = get_variable(model_nc) for z in model_nc: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level) else: if ser_r: LOGGER.debug('Process reanalysis step-by-step') tmp_total = [] for z in model_nc: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap', prefix='Rdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) model_subset_tmp = call(resource=tmp_total, variable=var, time_range=time_range) else: LOGGER.debug('Using whole dataset at once') model_subset_tmp = call(resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=time_range, ) # If dataset is 20CRV2 the 6 hourly file should be converted to daily. # Option to use previously 6h data from cache (if any) and not download daily files. if '20CRV2' in model: if timres == '6h': from cdo import Cdo cdo = Cdo(env=os.environ) model_subset = '%s.nc' % uuid.uuid1() tmp_f = '%s.nc' % uuid.uuid1() cdo_op = getattr(cdo, 'daymean') cdo_op(input=model_subset_tmp, output=tmp_f) sti = '00:00:00' cdo_op = getattr(cdo, 'settime') cdo_op(sti, input=tmp_f, output=model_subset) LOGGER.debug('File Converted from: %s to daily' % (timres)) else: model_subset = model_subset_tmp else: model_subset = model_subset_tmp LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 15) # ====================================== LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 20) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 30) start_time = time.time() # measure data preperation ... # ----------------------- # try: # import ctypes # # TODO: This lib is for linux # mkl_rt = ctypes.CDLL('libmkl_rt.so') # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('Current number of threads: %s' % (nth)) # mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('NEW number of threads: %s' % (nth)) # # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) # os.environ['MKL_NUM_THREADS'] = str(nth) # os.environ['OMP_NUM_THREADS'] = str(nth) # except Exception as e: # msg = 'Failed to set THREADS %s ' % e # LOGGER.debug(msg) # ----------------------- response.update_status('Start DIM calc', 50) # Calculation of Local Dimentsions ================== LOGGER.debug('Calculation of the distances using: %s metric' % (distance)) LOGGER.debug('Calculation of the dims with: %s' % (method)) dim_filename = '%s.txt' % model tmp_dim_fn = '%s.txt' % uuid.uuid1() Rsrc = config.Rsrc_dir() if (method == 'Python'): try: l_dist, l_theta = localdims(resource=model_subset, variable=var, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'Python_wrap'): try: l_dist, l_theta = localdims_par(resource=model_subset, variable=var, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'R'): # from os.path import join Rfile = 'localdimension_persistence_fullD.R' args = ['Rscript', os.path.join(Rsrc, Rfile), '%s' % model_subset, '%s' % var, '%s' % tmp_dim_fn] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R' LOGGER.exception(msg) raise Exception(msg) if (method == 'R_wrap'): # from os.path import join Rfile = 'localdimension_persistence_serrD.R' args = ['Rscript', os.path.join(Rsrc, Rfile), '%s' % model_subset, '%s' % var, '%s' % tmp_dim_fn] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R_wrap suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R_wrap' LOGGER.exception(msg) raise Exception(msg) try: res_times = get_time(model_subset) except: LOGGER.debug('Not standard calendar') res_times = analogs.get_time_nc(model_subset) # plot 1 ld_pdf = analogs.pdf_from_ld(x=l_dist, y=l_theta) # res_times=[res_times[i].isoformat().strip().split("T")[0].replace('-','') for i in range(len(res_times))] # concatenation of values concat_vals = column_stack([res_times, l_theta, l_dist]) savetxt(dim_filename, concat_vals, fmt='%s', delimiter=',') # output season try: seas = _TIMEREGIONS_[season]['month'] # [12, 1, 2] LOGGER.info('Season to grep from TIMEREGIONS: %s ' % season) LOGGER.info('Season N to grep from TIMEREGIONS: %s ' % seas) except: LOGGER.info('No months in TIMEREGIONS, moving to months') try: seas = _MONTHS_[season]['month'] # [1] or [2] or ... LOGGER.info('Season to grep from MONTHS: %s ' % season) LOGGER.info('Season N to grep from MONTHS: %s ' % seas) except: seas = [1,2,3,4,5,6,7,8,9,10,11,12] ind = [] # TODO: change concat_vals[i][0][4:6] to dt_obj.month !!! for i in range(len(res_times)): if (int(concat_vals[i][0][4:6]) in seas[:]): ind.append(i) sf = column_stack([concat_vals[i] for i in ind]).T seas_dim_filename = season + '_' + dim_filename savetxt(seas_dim_filename, sf, fmt='%s', delimiter=',') # -------------------------- plot with R --------------- R_plot_file = 'plot_csv.R' ld2_pdf = 'local_dims.pdf' ld2_seas_pdf = season + '_local_dims.pdf' args = ['Rscript', os.path.join(Rsrc, R_plot_file), '%s' % dim_filename, '%s' % ld2_pdf] try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf(s) to pass to output args = ['Rscript', os.path.join(Rsrc, R_plot_file), '%s' % seas_dim_filename, '%s' % ld2_seas_pdf] try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf(s) to pass to output # # ==================================================== response.update_status('preparing output', 80) response.outputs['ldist'].file = dim_filename response.outputs['ldist_seas'].file = seas_dim_filename response.outputs['ld_pdf'].file = ld_pdf response.outputs['ld2_pdf'].file = ld2_pdf response.outputs['ld2_seas_pdf'].file = ld2_seas_pdf response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): chdir(self.workdir) init_process_logger('log.txt') process_start_time = time.time() # measure process execution time ... response.update_status('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) # Filter resource: if type(resource) == list: resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data regrset = request.inputs['regrset'][0].data direction = request.inputs['direction'][0].data # Check if model has 360_day calendar: try: modcal, calunits = get_calendar(resource[0]) LOGGER.debug('CALENDAR: %s' % (modcal)) if '360_day' in modcal: if direction == 're2mo': if refSt.day == 31: refSt = refSt.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (refSt)) if refEn.day == 31: refEn = refEn.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (refEn)) else: # mo2re if dateSt.day == 31: dateSt = dateSt.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateSt)) if dateEn.day == 31: dateEn = dateEn.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateEn)) except: LOGGER.debug('Could not detect calendar') seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data bboxDef = '-20,40,30,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug( 'BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) normalize = request.inputs['normalize'][0].data plot = request.inputs['plot'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') try: if direction == 're2mo': anaSt = dt.combine(dateSt, dt_time( 0, 0)) # dt.strptime(dateSt[0], '%Y-%m-%d') anaEn = dt.combine(dateEn, dt_time( 0, 0)) # dt.strptime(dateEn[0], '%Y-%m-%d') refSt = dt.combine(refSt, dt_time( 12, 0)) # dt.strptime(refSt[0], '%Y-%m-%d') refEn = dt.combine(refEn, dt_time( 12, 0)) # dt.strptime(refEn[0], '%Y-%m-%d') r_time_range = [anaSt, anaEn] m_time_range = [refSt, refEn] elif direction == 'mo2re': anaSt = dt.combine(dateSt, dt_time( 12, 0)) # dt.strptime(refSt[0], '%Y-%m-%d') anaEn = dt.combine(dateEn, dt_time( 12, 0)) # dt.strptime(refEn[0], '%Y-%m-%d') refSt = dt.combine(refSt, dt_time( 0, 0)) # dt.strptime(dateSt[0], '%Y-%m-%d') refEn = dt.combine(refEn, dt_time( 0, 0)) # dt.strptime(dateEn[0], '%Y-%m-%d') r_time_range = [refSt, refEn] m_time_range = [anaSt, anaEn] else: LOGGER.exception( 'failed to find time periods for comparison direction') except: msg = 'failed to put simulation and reference time in order' LOGGER.exception(msg) raise Exception(msg) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') try: if model == 'NCEP': # getlevel = True getlevel = False if 'z' in var: level = var.strip('z') variable = 'hgt' # conform_units_to='hPa' else: variable = 'slp' level = None # conform_units_to='hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: variable = 'hgt' level = var.strip('z') # conform_units_to=None else: variable = 'prmsl' level = None # conform_units_to='hPa' else: LOGGER.exception('Reanalyses model not known') LOGGER.info('environment set') except: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) # LOGGER.exception("init took %s seconds.", time.time() - start_time) response.update_status('Read in the arguments', 10) ################# # get input data ################# # TODO: do not forget to select years start_time = time.time() # measure get_input_data ... response.update_status('fetching input data', 20) try: if direction == 're2mo': nc_reanalyses = reanalyses(start=anaSt.year, end=anaEn.year, variable=var, dataset=model, getlevel=getlevel) else: nc_reanalyses = reanalyses(start=refSt.year, end=refEn.year, variable=var, dataset=model, getlevel=getlevel) if type(nc_reanalyses) == list: nc_reanalyses = sorted( nc_reanalyses, key=lambda i: path.splitext(path.basename(i))[0]) else: nc_reanalyses = [nc_reanalyses] # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years, may be use the same (!) # TODO Now everything regrid to the reanalysis # if ('20CRV2' in model) and ('z' in var): if ('z' in var): tmp_total = [] origvar = get_variable(nc_reanalyses[0]) for z in nc_reanalyses: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted( tmp_total, key=lambda i: path.splitext(path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=r_time_range) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() nc_subset = call(inter_subset_tmp, variable='z%s' % level) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) system(tbr) # for i in inter_subset_tmp tbr = 'rm -f %s' % (inter_subset_tmp) system(tbr) else: # TODO: ADD HERE serial as well as in analogs reanalysis process!! nc_subset = call( resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=r_time_range, ) response.update_status('**** Input reanalyses data fetched', 30) except: msg = 'failed to fetch or subset input files' LOGGER.exception(msg) raise Exception(msg) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 40) m_start = m_time_range[0] m_end = m_time_range[1] # =============================================================== # REMOVE resources from the list which are out of interest from the list # (years > and < than requested for calculation) tmp_resource = [] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= m_end) and (tmpEn >= m_start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource start_time = time.time() # mesure data preperation ... # TODO: Check the callendars ! for model vs reanalyses. # TODO: Check the units! model vs reanalyses. try: m_total = [] modvar = get_variable(resource) # resource properties ds = Dataset(resource[0]) m_var = ds.variables[modvar] dims = list(m_var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen > 3): lev = ds.variables[dims[1]] # TODO: actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units == 'Pa'): m_level = str(int(level) * 100) else: m_level = level else: m_level = None if level == None: level_range = None else: level_range = [int(m_level), int(m_level)] ds.close() for z in resource: tmp_n = 'tmp_%s' % (uuid.uuid1()) # TODO: Important! if only 1 file - select time period from that first! # select level and regrid # \/\/ working version 19Feb2019 # b0 = call(resource=z, variable=modvar, level_range=level_range, # spatial_wrapping='wrap', cdover='system', # regrid_destination=nc_reanalyses[0], regrid_options='bil', prefix=tmp_n) try: b0 = call(resource=z, variable=modvar, level_range=level_range, spatial_wrapping='wrap', cdover='system', regrid_destination=nc_subset, regrid_options='bil', prefix=tmp_n) except: b0 = call(resource=z, variable=modvar, level_range=level_range, spatial_wrapping='wrap', cdover='system', regrid_destination=nc_reanalyses[0], regrid_options='bil', prefix=tmp_n) # select domain (already selected in fact, if regrided to 'nc_subset') b01 = call(resource=b0, geom=bbox, spatial_wrapping='wrap', prefix='levregr_' + path.basename(z)[0:-3]) # TODO: REPLACE rm -f by os.remove() ! tbr = 'rm -f %s' % (b0) system(tbr) tbr = 'rm -f %s.nc' % (tmp_n) system(tbr) # get full resource m_total.append(b01) model_subset = call(m_total, time_range=m_time_range) for i in m_total: tbr = 'rm -f %s' % (i) system(tbr) if m_level is not None: # Create new variable in model set ds = Dataset(model_subset, mode='a') mod_var = ds.variables.pop(modvar) dims = mod_var.dimensions new_modvar = ds.createVariable('z%s' % level, mod_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_modvar[:, :, :] = squeeze(mod_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() mod_subset = call(model_subset, variable='z%s' % level) else: mod_subset = model_subset except: msg = 'failed to subset simulation or reference data' LOGGER.exception(msg) raise Exception(msg) # -------------------------------------------- try: if direction == 'mo2re': simulation = mod_subset archive = nc_subset base_id = model sim_id = model_id elif direction == 're2mo': simulation = nc_subset archive = mod_subset base_id = model_id sim_id = model else: LOGGER.exception('direction not valid: %s ' % direction) except: msg = 'failed to find comparison direction' LOGGER.exception(msg) raise Exception(msg) try: if level is not None: out_var = 'z%s' % level else: var_archive = get_variable(archive) var_simulation = get_variable(simulation) if var_archive != var_simulation: rename_variable(archive, oldname=var_archive, newname=var_simulation) out_var = var_simulation LOGGER.info('varname %s in netCDF renamed to %s' % (var_archive, var_simulation)) except: msg = 'failed to rename variable in target files' LOGGER.exception(msg) raise Exception(msg) try: if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except: msg = 'failed to prepare seasonal cycle reference files' LOGGER.exception(msg) raise Exception(msg) # ip, output = mkstemp(dir='.', suffix='.txt') # output_file = path.abspath(output) output_file = 'output.txt' ################################ # Prepare names for config.txt # ################################ # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') # Fix < 1900 issue... refDatesString = refSt.isoformat().strip().split( "T")[0] + "_" + refEn.isoformat().strip().split("T")[0] simDatesString = dateSt.isoformat().strip().split( "T")[0] + "_" + dateEn.isoformat().strip().split("T")[0] archiveNameString = "base_" + out_var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) + '.nc' simNameString = "sim_" + out_var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) + '.nc' move(archive, archiveNameString) move(simulation, simNameString) archive = archiveNameString simulation = simNameString files = [path.abspath(archive), path.abspath(simulation), output_file] ############################ # generating the config file ############################ response.update_status('writing config file', 50) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=base_id, sim_id=sim_id, timewin=timewin, # varname=var, varname=out_var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], period=[ refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0] ], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except: msg = 'failed to generate config file' LOGGER.exception(msg) raise Exception(msg) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 60) # ----------------------- try: import ctypes # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) environ['MKL_NUM_THREADS'] = str(nth) environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) # ----------------------- # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ############### # ##### MUST be removed after castf90 recompiled with the latest hdf version # ##### NOT safe environ['HDF5_DISABLE_VERSION_CHECK'] = '1' # hdflib = os.path.expanduser("~") + '/anaconda/lib' # hdflib = os.getenv("HOME") + '/anaconda/lib' import pwd hdflib = pwd.getpwuid(getuid()).pw_dir + '/anaconda/lib' environ['LD_LIBRARY_PATH'] = hdflib # ################################################################################ try: response.update_status('execution of CASTf90', 70) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('analogue.out info:\n %s ' % output) LOGGER.exception('analogue.out errors:\n %s ' % error) response.update_status('**** CASTf90 suceeded', 80) except: msg = 'CASTf90 failed' LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) # TODO: Add try - except for pdfs if plot == 'Yes': analogs_pdf = analogs.plot_analogs(configfile=config_file) else: analogs_pdf = 'dummy_plot.pdf' with open(analogs_pdf, 'a'): utime(analogs_pdf, None) response.update_status('preparting output', 90) # Stopper to keep twitcher results, for debug # dummy=dummy response.outputs['analog_pdf'].file = analogs_pdf response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') # response.update_status('reformatted analog file', 95) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 95) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) response.outputs['output_log'].file = 'log.txt' return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') from datetime import datetime as dt from blackswan import weatherregimes as wr from tempfile import mkstemp response.update_status('execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ LOGGER.info('read in the arguments') # resources = self.getInputValues(identifier='resources') season = request.inputs['season'][0].data LOGGER.info('season %s', season) bboxDef = '-80,50,20,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug( 'BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) model_var = request.inputs['reanalyses'][0].data model, variable = model_var.split('_') period = request.inputs['period'][0].data LOGGER.info('period %s', period) anualcycle = request.inputs['anualcycle'][0].data kappa = request.inputs['kappa'][0].data LOGGER.info('kappa %s', kappa) method = request.inputs['method'][0].data LOGGER.info('Calc annual cycle with %s', method) sseas = request.inputs['sseas'][0].data LOGGER.info('Annual cycle calc with %s', sseas) start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') LOGGER.debug('start: %s , end: %s ' % (start, end)) ########################### # set the environment ########################### response.update_status('fetching data from archive', 10) try: if model == 'NCEP': getlevel = False if 'z' in variable: level = variable.strip('z') # conform_units_to = None else: level = None # conform_units_to = 'hPa' elif '20CRV2' in model: getlevel = False if 'z' in variable: level = variable.strip('z') # conform_units_to = None else: level = None # conform_units_to = 'hPa' else: LOGGER.exception('Reanalyses dataset not known') LOGGER.info('environment set for model: %s' % model) except: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## from blackswan.datafetch import reanalyses as rl from blackswan.utils import get_variable # from os.path import basename, splitext from os import system from netCDF4 import Dataset from numpy import squeeze try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=variable, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('fetching data done', 20) ############################################################ # get the required bbox and time region from resource data ############################################################ response.update_status('subsetting region of interest', 30) time_range = [start, end] ############################################################ # Block of level and domain selection for geop huge dataset ############################################################ LevMulti = False # =========================================================================================== # Temporary add step-by-step also for pressure... for slow VM machine... if ('z' in variable) or ('p' in variable): tmp_total = [] origvar = get_variable(model_nc) if ('z' in variable): level_range = [int(level), int(level)] else: level_range = None if (LevMulti == False): for z in model_nc: b0 = call(resource=z, variable=origvar, level_range=level_range, geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + basename(z)[0:-3]) tmp_total.append(b0) else: # multiproc - no inprovements yet, need to check in hi perf machine... # ----------------------- try: import ctypes import os # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) os.environ['MKL_NUM_THREADS'] = str(nth) os.environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) # ----------------------- from multiprocessing import Pool pool = Pool() # from multiprocessing.dummy import Pool as ThreadPool # pool = ThreadPool() tup_var = [origvar] * len(model_nc) tup_lev = [level] * len(model_nc) tup_bbox = [bbox] * len(model_nc) tup_args = zip(model_nc, tup_var, tup_lev, tup_bbox) tmp_total = pool.map(ocgis_call_wrap, tup_args) pool.close() pool.join() LOGGER.debug('Temporal subset files: %s' % (tmp_total)) tmp_total = sorted(tmp_total, key=lambda i: splitext(basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) system(tbr) if ('z' in variable): # Create new variable for Z geop ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset = call(inter_subset_tmp, variable='z%s' % level) else: model_subset = inter_subset_tmp else: model_subset = call( resource=model_nc, variable=variable, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) # ============================================================================================= LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 40) ############################################## # computing anomalies ############################################## response.update_status('computing anomalies ', 50) cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[1] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] LOGGER.info('reference time: %s', reference) model_anomal = wr.get_anomalies(model_subset, reference=reference, method=method, sseas=sseas) # , variable=variable) ##################### # extracting season ##################### response.update_status('normalizing data', 60) model_season = wr.get_season(model_anomal, season=season) response.update_status('anomalies computed and normalized', 70) ####################### # call the R scripts ####################### response.update_status('Start weather regime clustering ', 80) import shlex import subprocess from blackswan import config from os.path import curdir, exists, join try: # rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season # model_subset #model_ponderate # modelname = model # yr1 = start.year # yr2 = end.year ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % model_var, '%s' % kappa ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) except: msg = 'failed to build the R command' LOGGER.exception(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** weatherregime in R suceeded', 90) else: LOGGER.exception('NO! output returned from R call') except: msg = 'weatherregime in R' LOGGER.exception(msg) raise Exception(msg) response.update_status('Weather regime clustering done ', 95) ############################################ # set the outputs ############################################ # response.update_status('Set the process outputs ', 96) response.outputs['Routput_graphic'].file = output_graphics response.outputs['output_pca'].file = file_pca response.outputs['output_classification'].file = file_class response.outputs['output_netcdf'].file = model_subset response.update_status('done', 100) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ # response.update_status('execution started at : %s ' % dt.now(), 5) # start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 10) resource = archiveextract(resource=rename_complexinputs(request.inputs['resource'])) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data bboxDef = '-20,40,30,70' # in general format # level = 500 level = request.inputs['level'][0].data if (level == 500): dummylevel = 1000 # dummy workaround for cdo sellevel else: dummylevel = 500 LOGGER.debug('LEVEL selected: %s hPa' % (level)) bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # for i in bboxStr: bbox.append(int(i)) bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) normalize = request.inputs['normalize'][0].data plot = request.inputs['plot'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data detrend = request.inputs['detrend'][0].data LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 20) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: # not nesessary if fix ocgis_module.py refSt = dt.combine(refSt, dt_time(12, 0)) refEn = dt.combine(refEn, dt_time(12, 0)) dateSt = dt.combine(dateSt, dt_time(12, 0)) dateEn = dt.combine(dateEn, dt_time(12, 0)) # Check if 360_day calendar: try: if type(resource) is not list: resource = [resource] modcal, calunits = get_calendar(resource[0]) if '360_day' in modcal: if refSt.day == 31: refSt = refSt.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (refSt)) if refEn.day == 31: refEn = refEn.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (refEn)) if dateSt.day == 31: dateSt = dateSt.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateSt)) if dateEn.day == 31: dateEn = dateEn.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateEn)) except: LOGGER.debug('Could not detect calendar') if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.error('output format not valid') start = min(refSt, dateSt) end = max(refEn, dateEn) LOGGER.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took %s seconds.", time.time() - start_time) response.update_status('Read in and convert the arguments', 30) ######################## # input data preperation ######################## # TODO: Check if files containing more than one dataset response.update_status('Start preparing input data', 40) start_time = time.time() # mesure data preperation ... try: # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...]) if type(resource) == list: # resource.sort() resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] # =============================================================== # REMOVE resources which are out of interest from the list # (years > and < than requested for calculation) tmp_resource = [] for re in resource: s,e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= end) and (tmpEn >= start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource # Try to fix memory issue... (ocgis call for files like 20-30 gb... ) # IF 4D - select pressure level before domain cut # # resource properties ds = Dataset(resource[0]) variable = get_variable(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen > 3): lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units == 'Pa'): level = level * 100 dummylevel = dummylevel * 100 # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it # Not just level = level * 100. # Get Levels from cdo import Cdo cdo = Cdo(env=environ) lev_res = [] if(dimlen > 3): for res_fn in resource: tmp_f = 'lev_' + path.basename(res_fn) try: tmp_f = call(resource=res_fn, variable=variable, spatial_wrapping='wrap', level_range=[int(level), int(level)], prefix=tmp_f[0:-3]) except: comcdo = '%s,%s' % (level, dummylevel) cdo.sellevel(comcdo, input=res_fn, output=tmp_f) lev_res.append(tmp_f) else: lev_res = resource # =============================================================== # TODO: Before domain, Regrid to selected grid! (???) if no rean. # ================================================================ # Get domain regr_res = [] for res_fn in lev_res: tmp_f = 'dom_' + path.basename(res_fn) comcdo = '%s,%s,%s,%s' % (bbox[0], bbox[2], bbox[1], bbox[3]) try: tmp_f = call(resource=res_fn, geom=bbox, spatial_wrapping='wrap', prefix=tmp_f[0:-3]) except: cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f) regr_res.append(tmp_f) # ============================ # Block to Detrend data # TODO 1 Keep trend as separate file # TODO 2 Think how to add options to plot abomalies AND original data... # May be do archive and simulation = call.. over NOT detrended data and keep it as well if (dimlen > 3): res_tmp = get_level(regr_res, level=level) variable = 'z%s' % level else: res_tmp = call(resource=regr_res, spatial_wrapping='wrap') if detrend == 'None': orig_model_subset = res_tmp else: orig_model_subset = remove_mean_trend(res_tmp, varname=variable) # ============================ # archive_tmp = call(resource=regr_res, time_range=[refSt, refEn], spatial_wrapping='wrap') # simulation_tmp = call(resource=regr_res, time_range=[dateSt, dateEn], spatial_wrapping='wrap') ################################ # Prepare names for config.txt # ################################ # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') # Fix < 1900 issue... refDatesString = refSt.isoformat().strip().split("T")[0] + "_" + refEn.isoformat().strip().split("T")[0] simDatesString = dateSt.isoformat().strip().split("T")[0] + "_" + dateEn.isoformat().strip().split("T")[0] archiveNameString = "base_" + variable + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + variable + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=res_tmp, time_range=[refSt, refEn], spatial_wrapping='wrap', prefix=archiveNameString) simulation = call(resource=res_tmp, time_range=[dateSt, dateEn], spatial_wrapping='wrap', prefix=simNameString) ####################################################################################### if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.debug(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] LOGGER.debug("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ # TODO: add MODEL name as argument response.update_status('writing config file', 50) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=model_id, sim_id=model_id, timewin=timewin, varname=variable, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], period=[refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0]], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e LOGGER.debug(msg) raise Exception(msg) LOGGER.debug("write_config took %s seconds.", time.time() - start_time) ############## # CASTf90 call ############## import subprocess import shlex start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 60) # ----------------------- try: import ctypes # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) environ['MKL_NUM_THREADS'] = str(nth) environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) # ----------------------- # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ############### # ##### MUST be removed after castf90 recompiled with the latest hdf version # ##### NOT safe environ['HDF5_DISABLE_VERSION_CHECK'] = '1' # hdflib = os.path.expanduser("~") + '/anaconda/lib' # hdflib = os.getenv("HOME") + '/anaconda/lib' import pwd hdflib = pwd.getpwuid(getuid()).pw_dir + '/anaconda/lib' environ['LD_LIBRARY_PATH'] = hdflib # ################################################################################ try: # response.update_status('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('analogue.out info:\n %s ' % output) LOGGER.debug('analogue.out errors:\n %s ' % error) response.update_status('**** CASTf90 suceeded', 70) except Exception as e: msg = 'CASTf90 failed %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) # TODO: Add try - except for pdfs if plot == 'Yes': analogs_pdf = analogs.plot_analogs(configfile=config_file) else: analogs_pdf = 'dummy_plot.pdf' with open(analogs_pdf, 'a'): utime(analogs_pdf, None) response.update_status('preparing output', 80) response.outputs['analog_pdf'].file = analogs_pdf response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive if seacyc is True: response.outputs['base_netcdf'].file = seasoncyc_base response.outputs['sim_netcdf'].file = seasoncyc_sim else: # TODO: Still unclear how to overpass unknown number of outputs dummy_base = 'dummy_base.nc' dummy_sim = 'dummy_sim.nc' with open(dummy_base, 'a'): utime(dummy_base, None) with open(dummy_sim, 'a'): utime(dummy_sim, None) response.outputs['base_netcdf'].file = dummy_base response.outputs['sim_netcdf'].file = dummy_sim ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 90) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 95) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 7) refSt = request.inputs['refSt'][0].data #refEn = request.inputs['refEn'][0].data refEn = dt.strptime( '%s%s%s' % (dt.now().year, dt.now().month, dt.now().day), '%Y%m%d') refEn = refEn - timedelta(days=3) dateSt = request.inputs['dateSt'][0].data #dateEn = request.inputs['dateEn'][0].data dateEn = dt.strptime( '%s%s%s' % (dt.now().year, dt.now().month, dt.now().day), '%Y%m%d') dateEn = dateEn - timedelta(days=3) seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data bboxDef = '-80,50,20,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug( 'BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) normalize = request.inputs['normalize'][0].data detrend = request.inputs['detrend'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 8) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.exception(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: response.update_status('Preparing enviroment converting arguments', 9) LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt)) start = min(refSt, dateSt) end = max(refEn, dateEn) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.exception(msg) raise Exception(msg) ########################### # set the environment ########################### response.update_status('fetching data from archive', 10) # We work only with NCEP getlevel = False if 'z' in var: level = var.strip('z') else: level = None ########################################## # fetch Data from original data archive ########################################## try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=var, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 17) LOGGER.debug("start and end time: %s - %s" % (start, end)) time_range = [start, end] # Checking memory and dataset size m_size = get_files_size(model_nc) memory_avail = psutil.virtual_memory().available thrs = 0.2 # 20% if (m_size >= thrs * memory_avail): ser_r = True else: ser_r = False LOGGER.debug('Available Memory: %s ' % (memory_avail)) LOGGER.debug('Dataset size: %s ' % (m_size)) LOGGER.debug('Threshold: %s ' % (thrs * memory_avail)) LOGGER.debug('Serial or at once: %s ' % (ser_r)) # if ('20CRV2' in model) and ('z' in var): if ('z' in var): tmp_total = [] origvar = get_variable(model_nc) for z in model_nc: tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted( tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level) else: if ser_r: LOGGER.debug('Process reanalysis step-by-step') tmp_total = [] for z in model_nc: tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap', prefix='Rdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted( tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) model_subset_tmp = call(resource=tmp_total, variable=var, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) else: LOGGER.debug('Using whole dataset at once') model_subset_tmp = call( resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=time_range, ) # Rest from 20CRV... model_subset = model_subset_tmp LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 19) # BLOCK OF DETRENDING of model_subset ! # Original model subset kept to further visualisaion if needed # Now is issue with SLP: # TODO 1 Keep trend as separate file # TODO 2 Think how to add options to plot abomalies AND original data... # May be do archive and simulation = call.. over NOT detrended data and keep it as well # TODO 3 Check with faster smoother add removing trend of each grid if detrend == 'None': orig_model_subset = model_subset else: orig_model_subset = remove_mean_trend(model_subset, varname=var) # ====================================== LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 20) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 22) start_time = time.time() # measure data preperation ... try: # Construct descriptive filenames for the three files # listed in config file # TODO check strftime for years <1900 (!) refDatesString = dt.strftime( refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') simDatesString = dt.strftime( dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) archive = call(resource=model_subset, time_range=[refSt, refEn], prefix=archiveNameString) simulation = call(resource=model_subset, time_range=[dateSt, dateEn], prefix=simNameString) LOGGER.info('archive and simulation files generated: %s, %s' % (archive, simulation)) except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.exception(msg) raise Exception(msg) try: if seacyc is True: LOGGER.info('normalization function with method: %s ' % normalize) seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = seasoncyc_sim = None except Exception as e: msg = 'failed to generate normalization files %s ' % e LOGGER.exception(msg) raise Exception(msg) output_file = 'output.txt' files = [ os.path.abspath(archive), os.path.abspath(simulation), output_file ] LOGGER.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=model, sim_id=model, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[ dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d') ], bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox)) response.update_status('generated config file', 25) ####################### # CASTf90 call ####################### start_time = time.time() # measure call castf90 #----------------------- try: import ctypes # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) os.environ['MKL_NUM_THREADS'] = str(nth) os.environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) #----------------------- # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ############### # ##### MUST be removed after castf90 recompiled with the latest hdf version # ##### NOT safe os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1' #hdflib = os.path.expanduser("~") + '/anaconda/lib' #hdflib = os.getenv("HOME") + '/anaconda/lib' import pwd hdflib = pwd.getpwuid(os.getuid()).pw_dir + '/anaconda/lib' os.environ['LD_LIBRARY_PATH'] = hdflib # ################################################################################ response.update_status('Start CASTf90 call', 30) try: # response.update_status('execution of CASTf90', 50) cmd = ['analogue.out', config_file] LOGGER.debug("castf90 command: %s", cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOGGER.info('analogue output:\n %s', output) response.update_status('**** CASTf90 suceeded', 40) except CalledProcessError as e: msg = 'CASTf90 failed:\n{0}'.format(e.output) LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) # --------------- R cont analogs calcs ----------------------------------- ####################### # call the R scripts ####################### response.update_status( 'Start calculation of the stats and Return Periods ', 50) import shlex # import subprocess from blackswan import config from blackswan.visualisation import pdfmerge from os.path import curdir, exists, join try: #rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'analogs_diags-prox.R' Rdatfile = 'analogs_RT.Rdat' probs_c = 0.7 probs_n = 0.3 args = [ 'Rscript', join(Rsrc, Rfile), '%s' % output_file, '%s' % probs_c, '%s' % probs_n, '%s' % Rdatfile ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) except: msg = 'failed to build the R command' LOGGER.exception(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Return Periods with R suceeded', 60) else: LOGGER.exception('NO! output returned from R call') analogs_pdf = pdfmerge( ['analogs_score-diags_new.pdf', 'analogs_RP-diags_new.pdf']) except: msg = 'ReturnPeriods in R' LOGGER.exception(msg) raise Exception(msg) response.update_status('Calculation of Return Periods done ', 70) # --------------- END of R cont analogs calcs ---------------------------- response.update_status('preparing output', 75) response.outputs['analog_pdf'].file = analogs_pdf response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive if seacyc is True: response.outputs['base_netcdf'].file = seasoncyc_base response.outputs['sim_netcdf'].file = seasoncyc_sim else: # TODO: Still unclear how to overpass unknown number of outputs dummy_base = 'dummy_base.nc' dummy_sim = 'dummy_sim.nc' with open(dummy_base, 'a'): os.utime(dummy_base, None) with open(dummy_sim, 'a'): os.utime(dummy_sim, None) response.outputs['base_netcdf'].file = dummy_base response.outputs['sim_netcdf'].file = dummy_sim ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 80) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ # response.update_status('execution started at : %s ' % dt.now(), 5) # start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 10) resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data bboxDef = '-20,40,30,70' # in general format # level = 500 season = request.inputs['season'][0].data level = request.inputs['level'][0].data if (level == 500): dummylevel = 1000 # dummy workaround for cdo sellevel else: dummylevel = 500 LOGGER.debug('LEVEL selected: %s hPa' % (level)) bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug( 'BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # for i in bboxStr: bbox.append(int(i)) bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) distance = request.inputs['dist'][0].data method = request.inputs['method'][0].data LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 20) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: # not nesessary if fix ocgis_module.py dateSt = dt.combine(dateSt, dt_time(12, 0)) dateEn = dt.combine(dateEn, dt_time(12, 0)) # Check if 360_day calendar: try: if type(resource) is not list: resource = [resource] modcal, calunits = get_calendar(resource[0]) if '360_day' in modcal: if dateSt.day == 31: dateSt = dateSt.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateSt)) if dateEn.day == 31: dateEn = dateEn.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateEn)) except: LOGGER.debug('Could not detect calendar') start = dateSt end = dateEn time_range = [start, end] LOGGER.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took %s seconds.", time.time() - start_time) response.update_status('Read in and convert the arguments', 30) ######################## # input data preperation ######################## # TODO: Check if files containing more than one dataset response.update_status('Start preparing input data', 40) start_time = time.time() # mesure data preperation ... try: # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...]) if type(resource) == list: # resource.sort() resource = sorted( resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] # =============================================================== # REMOVE resources which are out of interest from the list # (years > and < than requested for calculation) tmp_resource = [] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= end) and (tmpEn >= start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource # Try to fix memory issue... (ocgis call for files like 20-30 gb... ) # IF 4D - select pressure level before domain cut # # resource properties ds = Dataset(resource[0]) variable = get_variable(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown_model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen > 3): lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units == 'Pa'): level = level * 100 dummylevel = dummylevel * 100 # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it # Not just level = level * 100. # Get Levels from cdo import Cdo cdo = Cdo(env=environ) lev_res = [] if (dimlen > 3): for res_fn in resource: tmp_f = 'lev_' + path.basename(res_fn) try: tmp_f = call(resource=res_fn, variable=variable, spatial_wrapping='wrap', level_range=[int(level), int(level)], prefix=tmp_f[0:-3]) except: comcdo = '%s,%s' % (level, dummylevel) cdo.sellevel(comcdo, input=res_fn, output=tmp_f) lev_res.append(tmp_f) else: lev_res = resource # =============================================================== # TODO: Before domain, Regrid to selected grid! (???) if no rean. # ================================================================ # Get domain regr_res = [] for res_fn in lev_res: tmp_f = 'dom_' + path.basename(res_fn) comcdo = '%s,%s,%s,%s' % (bbox[0], bbox[2], bbox[1], bbox[3]) try: tmp_f = call(resource=res_fn, geom=bbox, spatial_wrapping='wrap', prefix=tmp_f[0:-3]) except: cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f) regr_res.append(tmp_f) # ============================ # Block to collect final data if (dimlen > 3): res_tmp_tmp = get_level(regr_res, level=level) variable = 'z%s' % level res_tmp = call(resource=res_tmp_tmp, variable=variable, time_range=time_range) else: res_tmp = call(resource=regr_res, time_range=time_range, spatial_wrapping='wrap') ####################################################################################### except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.debug(msg) raise Exception(msg) LOGGER.debug("data preperation took %s seconds.", time.time() - start_time) # ----------------------- # try: # import ctypes # # TODO: This lib is for linux # mkl_rt = ctypes.CDLL('libmkl_rt.so') # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('Current number of threads: %s' % (nth)) # mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('NEW number of threads: %s' % (nth)) # # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) # environ['MKL_NUM_THREADS'] = str(nth) # environ['OMP_NUM_THREADS'] = str(nth) # except Exception as e: # msg = 'Failed to set THREADS %s ' % e # LOGGER.debug(msg) # ----------------------- response.update_status('Start DIM calc', 50) # Calculation of Local Dimentsions ================== LOGGER.debug('Calculation of the distances using: %s metric' % (distance)) LOGGER.debug('Calculation of the dims with: %s' % (method)) dim_filename = '%s.txt' % model_id tmp_dim_fn = '%s.txt' % uuid.uuid1() Rsrc = config.Rsrc_dir() if (method == 'Python'): try: l_dist, l_theta = localdims(resource=res_tmp, variable=variable, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'Python_wrap'): try: l_dist, l_theta = localdims_par(resource=res_tmp, variable=variable, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'R'): # from os.path import join Rfile = 'localdimension_persistence_fullD.R' args = [ 'Rscript', path.join(Rsrc, Rfile), '%s' % res_tmp, '%s' % variable, '%s' % tmp_dim_fn ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R' LOGGER.exception(msg) raise Exception(msg) if (method == 'R_wrap'): # from os.path import join Rfile = 'localdimension_persistence_serrD.R' args = [ 'Rscript', path.join(Rsrc, Rfile), '%s' % res_tmp, '%s' % variable, '%s' % tmp_dim_fn ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R_wrap suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R_wrap' LOGGER.exception(msg) raise Exception(msg) try: res_times = get_time(res_tmp) except: LOGGER.debug('Not standard calendar') res_times = analogs.get_time_nc(res_tmp) # plot 1 ld_pdf = analogs.pdf_from_ld(x=l_dist, y=l_theta) # res_times = [ res_times[i].isoformat().strip().split("T")[0].replace('-', '') for i in range(len(res_times)) ] # concatenation of values concat_vals = column_stack([res_times, l_theta, l_dist]) savetxt(dim_filename, concat_vals, fmt='%s', delimiter=',') # output season try: seas = _TIMEREGIONS_[season]['month'] # [12, 1, 2] LOGGER.info('Season to grep from TIMEREGIONS: %s ' % season) LOGGER.info('Season N to grep from TIMEREGIONS: %s ' % seas) except: LOGGER.info('No months in TIMEREGIONS, moving to months') try: seas = _MONTHS_[season]['month'] # [1] or [2] or ... LOGGER.info('Season to grep from MONTHS: %s ' % season) LOGGER.info('Season N to grep from MONTHS: %s ' % seas) except: seas = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] ind = [] # TODO: change concat_vals[i][0][4:6] to dt_obj.month !!! for i in range(len(res_times)): if (int(concat_vals[i][0][4:6]) in seas[:]): ind.append(i) sf = column_stack([concat_vals[i] for i in ind]).T seas_dim_filename = season + '_' + dim_filename savetxt(seas_dim_filename, sf, fmt='%s', delimiter=',') # -------------------------- plot with R --------------- R_plot_file = 'plot_csv.R' ld2_pdf = 'local_dims.pdf' ld2_seas_pdf = season + '_local_dims.pdf' args = [ 'Rscript', path.join(Rsrc, R_plot_file), '%s' % dim_filename, '%s' % ld2_pdf ] try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf to pass to output args = [ 'Rscript', path.join(Rsrc, R_plot_file), '%s' % seas_dim_filename, '%s' % ld2_seas_pdf ] try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf(s) to pass to output # # ==================================================== response.update_status('preparing output', 80) response.outputs['ldist'].file = dim_filename response.outputs['ldist_seas'].file = seas_dim_filename response.outputs['ld_pdf'].file = ld_pdf response.outputs['ld2_pdf'].file = ld2_pdf response.outputs['ld2_seas_pdf'].file = ld2_seas_pdf response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 6) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data bboxDef = '-20,40,30,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) plot = request.inputs['plot'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') ref_model_var = request.inputs['Refreanalyses'][0].data ref_model, ref_var = ref_model_var.split('_') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 7) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.exception(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: response.update_status('Preparing enviroment converting arguments', 8) LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt)) # normalize == 'None': seacyc = False if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.exception(msg) raise Exception(msg) ########################### # set the environment ########################### response.update_status('fetching data from archive', 9) getlevel = False if 'z' in var: level = var.strip('z') else: level = None ########################################## # fetch Data from original data archive ########################################## try: model_nc = rl(start=dateSt.year, end=dateEn.year, dataset=model, variable=var, getlevel=getlevel) ref_model_nc = rl(start=refSt.year, end=refEn.year, dataset=ref_model, variable=ref_var, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 10) # Checking memory and dataset size model_size = get_files_size(model_nc) ref_model_size = get_files_size(ref_model_nc) m_size = max(model_size, ref_model_size) memory_avail = psutil.virtual_memory().available thrs = 0.2 # 20% if (m_size >= thrs * memory_avail): ser_r = True else: ser_r = False LOGGER.debug('Available Memory: %s ' % (memory_avail)) LOGGER.debug('Dataset size: %s ' % (m_size)) LOGGER.debug('Threshold: %s ' % (thrs * memory_avail)) LOGGER.debug('Serial or at once: %s ' % (ser_r)) # ##################################################### # Construct descriptive filenames for the three files # # listed in config file # # TODO check strftime for years <1900 (!) # # ##################################################### # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d') # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d') # Fix < 1900 issue... refDatesString = refSt.isoformat().strip().split("T")[0] + "_" + refEn.isoformat().strip().split("T")[0] simDatesString = dateSt.isoformat().strip().split("T")[0] + "_" + dateEn.isoformat().strip().split("T")[0] archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \ % (bbox[0], bbox[2], bbox[1], bbox[3]) if ('z' in var): # ------------------ NCEP ------------------- tmp_total = [] origvar = get_variable(model_nc) for z in model_nc: b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) time_range = [dateSt, dateEn] tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) ds.close() simulation = call(inter_subset_tmp, variable='z%s' % level, prefix=simNameString) # ------------------ 20CRV2c ------------------- tmp_total = [] origvar = get_variable(ref_model_nc) for z in ref_model_nc: tmp_n = 'tmp_%s' % (uuid.uuid1()) # select level and regrid b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], spatial_wrapping='wrap', cdover='system', regrid_destination=model_nc[0], regrid_options='bil', prefix=tmp_n) # select domain b01 = call(resource=b0, variable=origvar, geom=bbox, spatial_wrapping='wrap', prefix='levregr_' + os.path.basename(z)[0:-3]) tbr = 'rm -f %s' % (b0) os.system(tbr) tbr = 'rm -f %s.nc' % (tmp_n) os.system(tbr) tmp_total.append(b01) time_range = [refSt, refEn] tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) ref_inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(ref_inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) ds.close() archive = call(ref_inter_subset_tmp, variable='z%s' % level, prefix=archiveNameString) else: if ser_r: LOGGER.debug('Process reanalysis step-by-step') # ----- NCEP ------ tmp_total = [] for z in model_nc: b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap', prefix='Rdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) simulation = call(resource=tmp_total, variable=var, time_range=[dateSt, dateEn], prefix=simNameString) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # ----- 20CRV2c ------ tmp_n = 'tmp_%s' % (uuid.uuid1()) tmp_total = [] for z in ref_model_nc: # regrid b0 = call(resource=z, variable=ref_var, spatial_wrapping='wrap', cdover='system', regrid_destination=model_nc[0], regrid_options='bil', prefix=tmp_n) # select domain b01 = call(resource=b0, variable=ref_var, geom=bbox, spatial_wrapping='wrap', prefix='ref_Rdom_' + os.path.basename(z)[0:-3]) tbr = 'rm -f %s' % (b0) os.system(tbr) tbr = 'rm -f %s.nc' % (tmp_n) os.system(tbr) tmp_total.append(b01) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) archive = call(resource=tmp_total, variable=ref_var, time_range=[refSt, refEn], prefix=archiveNameString) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) else: LOGGER.debug('Using whole dataset at once') simulation = call(resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=[dateSt, dateEn], prefix=simNameString) ref_inter_subset_tmp = call(resource=ref_model_nc, variable=ref_var, spatial_wrapping='wrap', cdover='system', regrid_destination=model_nc[0], regrid_options='bil') archive = call(resource=ref_inter_subset_tmp, geom=bbox, spatial_wrapping='wrap', time_range=[refSt, refEn], prefix=archiveNameString) response.update_status('datasets subsetted', 15) LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 20) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 30) start_time = time.time() # measure data preperation ... LOGGER.info('archive and simulation files generated: %s, %s' % (archive, simulation)) # Rename variable (TODO: For this specific process we know names: slp and prmsl...) try: if level is not None: out_var = 'z%s' % level else: var_archive = get_variable(archive) var_simulation = get_variable(simulation) if var_archive != var_simulation: rename_variable(archive, oldname=var_archive, newname=var_simulation) out_var = var_simulation LOGGER.info('varname %s in netCDF renamed to %s' % (var_archive, var_simulation)) except: msg = 'failed to rename variable in target files' LOGGER.exception(msg) raise Exception(msg) # seacyc is False: seasoncyc_base = seasoncyc_sim = None output_file = 'output.txt' files = [os.path.abspath(archive), os.path.abspath(simulation), output_file] LOGGER.debug("Data preperation took %s seconds.", time.time() - start_time) ############################ # generate the config file ############################ config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=ref_model, sim_id=model, timewin=timewin, varname=out_var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], period=[refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0]], bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox)) response.update_status('generated config file', 40) ####################### # CASTf90 call ####################### start_time = time.time() # measure call castf90 # ----------------------- try: import ctypes # TODO: This lib is for linux mkl_rt = ctypes.CDLL('libmkl_rt.so') nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('Current number of threads: %s' % (nth)) mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) nth = mkl_rt.mkl_get_max_threads() LOGGER.debug('NEW number of threads: %s' % (nth)) # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) os.environ['MKL_NUM_THREADS'] = str(nth) os.environ['OMP_NUM_THREADS'] = str(nth) except Exception as e: msg = 'Failed to set THREADS %s ' % e LOGGER.debug(msg) # ----------------------- # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ############### # ##### MUST be removed after castf90 recompiled with the latest hdf version # ##### NOT safe os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1' # hdflib = os.path.expanduser("~") + '/anaconda/lib' # hdflib = os.getenv("HOME") + '/anaconda/lib' import pwd hdflib = pwd.getpwuid(os.getuid()).pw_dir + '/anaconda/lib' os.environ['LD_LIBRARY_PATH'] = hdflib # ################################################################################ response.update_status('Start CASTf90 call', 50) try: # response.update_status('execution of CASTf90', 50) cmd = ['analogue.out', config_file] LOGGER.debug("castf90 command: %s", cmd) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) LOGGER.info('analogue output:\n %s', output) response.update_status('**** CASTf90 suceeded', 60) except CalledProcessError as e: msg = 'CASTf90 failed:\n{0}'.format(e.output) LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) # TODO: Add try - except for pdfs if plot == 'Yes': analogs_pdf = analogs.plot_analogs(configfile=config_file) else: analogs_pdf = 'dummy_plot.pdf' with open(analogs_pdf, 'a'): os.utime(analogs_pdf, None) response.update_status('preparing output', 70) response.outputs['analog_pdf'].file = analogs_pdf response.outputs['config'].file = config_file response.outputs['analogs'].file = output_file ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 80) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response