def spaghetti(resouces, variable=None, title=None, file_extension='png'): """ creates a png file containing the appropriate spaghetti plot as a field mean of the values. :param resouces: list of files containing the same variable :param variable: variable to be visualised. If None (default), variable will be detected :param title: string to be used as title :retruns str: path to png file """ from blackswan.calculation import fieldmean try: fig = plt.figure(figsize=(20, 10), dpi=600, facecolor='w', edgecolor='k') LOGGER.debug('Start visualisation spaghetti plot') # === prepare invironment if type(resouces) != list: resouces = [resouces] if variable is None: variable = utils.get_variable(resouces[0]) if title is None: title = "Field mean of %s " % variable LOGGER.info('plot values preparation done') except: msg = "plot values preparation failed" LOGGER.exception(msg) raise Exception(msg) try: for c, nc in enumerate(resouces): # get timestapms try: dt = utils.get_time(nc) # [datetime.strptime(elem, '%Y-%m-%d') for elem in strDate[0]] ts = fieldmean(nc) plt.plot(dt, ts) # fig.line( dt,ts ) except: msg = "spaghetti plot failed for %s " % nc LOGGER.exception(msg) plt.title(title, fontsize=20) plt.grid() output_png = fig2plot(fig=fig, file_extension=file_extension) plt.close() LOGGER.info('timeseries spaghetti plot done for %s with %s lines.' % (variable, c)) except: msg = 'matplotlib spaghetti plot failed' LOGGER.exception(msg) return output_png
def uncertainty(resouces, variable=None, ylim=None, title=None, file_extension='png', window=None): """ creates a png file containing the appropriate uncertainty plot. :param resouces: list of files containing the same variable :param variable: variable to be visualised. If None (default), variable will be detected :param title: string to be used as title :param window: windowsize of the rolling mean :returns str: path/to/file.png """ LOGGER.debug('Start visualisation uncertainty plot') import pandas as pd import numpy as np from os.path import basename from blackswan.utils import get_time, sort_by_filename from blackswan.calculation import fieldmean from blackswan.metadata import get_frequency # === prepare invironment if type(resouces) == str: resouces = list([resouces]) if variable is None: variable = utils.get_variable(resouces[0]) if title is None: title = "Field mean of %s " % variable try: fig = plt.figure(figsize=(20, 10), facecolor='w', edgecolor='k') # dpi=600, # variable = utils.get_variable(resouces[0]) df = pd.DataFrame() LOGGER.info('variable %s found in resources.' % variable) datasets = sort_by_filename(resouces, historical_concatination=True) for key in datasets.keys(): try: data = fieldmean(datasets[key]) # get_values(f) ts = get_time(datasets[key]) ds = pd.Series(data=data, index=ts, name=key) # ds_yr = ds.resample('12M', ).mean() # yearly mean loffset='6M' df[key] = ds except Exception: LOGGER.exception('failed to calculate timeseries for %s ' % (key)) frq = get_frequency(resouces[0]) print frq if window is None: if frq == 'day': window = 10951 elif frq == 'man': window = 359 elif frq == 'sem': window = 119 elif frq == 'yr': window = 30 else: LOGGER.debug('frequency %s is not included' % frq) window = 30 if len(df.index.values) >= window * 2: # TODO: calculate windowsize according to timestapms (day,mon,yr ... with get_frequency) df_smooth = df.rolling(window=window, center=True).mean() LOGGER.info('rolling mean calculated for all input data') else: df_smooth = df LOGGER.debug('timeseries too short for moving mean') fig.text(0.95, 0.05, '!!! timeseries too short for moving mean over 30years !!!', fontsize=20, color='red', ha='right', va='bottom', alpha=0.5) try: rmean = df_smooth.quantile([0.5], axis=1,) # df_smooth.median(axis=1) # skipna=False quantile([0.5], axis=1, numeric_only=False ) q05 = df_smooth.quantile([0.10], axis=1,) # numeric_only=False) q33 = df_smooth.quantile([0.33], axis=1,) # numeric_only=False) q66 = df_smooth.quantile([0.66], axis=1, ) # numeric_only=False) q95 = df_smooth.quantile([0.90], axis=1, ) # numeric_only=False) LOGGER.info('quantile calculated for all input data') except Exception: LOGGER.exception('failed to calculate quantiles') try: plt.fill_between(df_smooth.index.values, np.squeeze(q05.values), np.squeeze(q95.values), alpha=0.5, color='grey') plt.fill_between(df_smooth.index.values, np.squeeze(q33.values), np.squeeze(q66.values), alpha=0.5, color='grey') plt.plot(df_smooth.index.values, np.squeeze(rmean.values), c='r', lw=3) plt.xlim(min(df.index.values), max(df.index.values)) plt.ylim(ylim) plt.title(title, fontsize=20) plt.grid() # .grid_line_alpha=0.3 output_png = fig2plot(fig=fig, file_extension=file_extension) plt.close() LOGGER.debug('timeseries uncertainty plot done for %s' % variable) except Exception as err: raise Exception('failed to calculate quantiles. %s' % err.message) except Exception: LOGGER.exception('uncertainty plot failed for %s.' % variable) _, output_png = mkstemp(dir='.', suffix='.png') return output_png
def set_dynamic_md(resource): """ Dynamic meta data like time frequency, spatial extent, start/end time, etc. :param resource: netCDF file where basic meta data should be set """ from blackswan.utils import get_timerange, get_time frequency = get_frequency(resource) time_coverage_start, time_coverage_end = get_timerange(resource) time_number_steps = len(get_time(resource)) # max_lat, min_lat, max_lon, min_lat = get_extent(resource) ds = Dataset(resource, mode='a') try: driving_experiment = ds.driving_experiment ds.delncattr('driving_experiment') except Exception as e: LOGGER.error(e) driving_experiment = '' try: driving_experiment_name = ds.driving_experiment_name ds.delncattr('driving_experiment_name') except Exception as e: LOGGER.error(e) driving_experiment_name = '' try: driving_model_ensemble_member = ds.driving_model_ensemble_member ds.delncattr('driving_model_ensemble_member') except Exception as e: LOGGER.error(e) driving_model_ensemble_member = '' try: experiment = ds.experiment ds.delncattr('experiment') except Exception as e: LOGGER.error(e) experiment = '' try: tracking_id = ds.tracking_id ds.delncattr('tracking_id') except Exception as e: LOGGER.error(e) tracking_id = '' try: experiment_id = ds.experiment_id ds.delncattr('experiment_id') except Exception as e: LOGGER.error(e) experiment_id = '' try: project_id = ds.project_id ds.delncattr('project_id') except Exception as e: LOGGER.error(e) project_id = '' try: institution_id = ds.institution_id ds.delncattr('institution_id') except Exception as e: LOGGER.error(e) institution_id = '' try: model_version_id = ds.model_version_id ds.delncattr('model_version_id') except Exception as e: LOGGER.error(e) model_version_id = '' try: driving_model_id = ds.driving_model_id ds.delncattr('driving_model_id') except Exception as e: LOGGER.error(e) driving_model_id = '' try: driving_ensemble_member = ds.driving_ensemble_member ds.delncattr('driving_ensemble_member') except Exception as e: LOGGER.error(e) driving_ensemble_member = '' try: driving_model_id = ds.driving_model_id ds.delncattr('driving_model_id') except Exception as e: LOGGER.error(e) driving_model_id = '' try: model_id = ds.model_id ds.delncattr('model_id') except Exception as e: LOGGER.error(e) driving_model_id = '' try: contact = ds.contact ds.delncattr('contact') except Exception as e: LOGGER.error(e) contact = '' try: driving_experiment_id = ds.driving_experiment_id ds.delncattr('driving_experiment_id') except Exception as e: LOGGER.error(e) driving_experiment_id = '' try: domain = ds.CORDEX_domain except Exception as e: LOGGER.error(e) domain = '' ds.close() min_lat, max_lat, min_lon, max_lon = get_extent(resource) geospatial_increment = get_geospatial_increment(resource) try: md_dynamic = { 'in_var_driving_experiment': driving_experiment, 'in_var_driving_experiment_name': driving_experiment_name, 'in_var_driving_model_ensemble_member': driving_model_ensemble_member, 'in_var_experiment': experiment, 'in_var_experiment_id': experiment_id, 'in_var_project_id': project_id, 'in_var_contact': contact, 'in_var_institution_id': institution_id, 'in_var_model_version_id': model_version_id, 'in_var_driving_model_id': driving_model_id, 'in_var_model_id': model_id, 'in_var_driving_ensemble_member': driving_ensemble_member, 'in_var_driving_experiment_id': driving_experiment_id, 'in_var_domain': domain, 'in_var_tracking_id': tracking_id, 'frequency': frequency, 'time_coverage_start': time_coverage_start, 'time_coverage_end': time_coverage_end, 'time_number_steps': time_number_steps, #'time_number_gaps': '', #'cdm_datatype':'' , 'domain': '%s_subset' % domain, 'geospatial_increment': geospatial_increment, 'geospatial_lat_min': min_lat, 'geospatial_lat_max': max_lat, 'geospatial_lon_min': min_lon, 'geospatial_lon_max': max_lon, } #:product = "output" ; #:rcm_version_id = "v1" ; #:references = "http://www.smhi.se/en/Research/Research-departments/climate-research-rossby-centre" ; except Exception as e: LOGGER.error('failed to populate dynamic metadata dictionary') try: ds = Dataset(resource, mode='a') ds.setncatts(md_dynamic) ds.close() except Exception as e: LOGGER.error(e) return (resource)
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' response.update_status('execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ try: LOGGER.info('read in the arguments') # resources = self.getInputValues(identifier='resources') season = request.inputs['season'][0].data LOGGER.info('season %s', season) period = request.inputs['period'][0].data LOGGER.info('period %s', period) anualcycle = request.inputs['anualcycle'][0].data start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') LOGGER.debug('start: %s , end: %s ' % (start, end)) resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) # resource = archiveextract(resource=[res.file for res in request.inputs['resource']]) url_Rdat = request.inputs['Rdat'][0].data url_dat = request.inputs['dat'][0].data url_ref_file = request.inputs['netCDF'][0].data # can be None # season = self.getInputValues(identifier='season')[0] # period = self.getInputValues(identifier='period')[0] # anualcycle = self.getInputValues(identifier='anualcycle')[0] LOGGER.info('period %s' % str(period)) LOGGER.info('season %s' % str(season)) LOGGER.info('read in the arguments') LOGGER.info('url_ref_file: %s' % url_ref_file) LOGGER.info('url_Rdat: %s' % url_Rdat) LOGGER.info('url_dat: %s' % url_dat) except Exception as e: LOGGER.debug('failed to convert arguments %s ' % e) ############################ # fetching trainging data ############################ try: dat = abspath(download(url_dat)) Rdat = abspath(download(url_Rdat)) LOGGER.info('training data fetched') except Exception as e: LOGGER.error('failed to fetch training data %s' % e) ########################################################## # get the required bbox and time region from resource data ########################################################## # from flyingpigeon.weatherregimes import get_level try: from blackswan.ocgis_module import call from blackswan.utils import get_variable time_range = [start, end] variable = get_variable(resource) if len(url_ref_file) > 0: ref_file = download(url_ref_file) model_subset = call( resource=resource, variable=variable, time_range= time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', regrid_destination=ref_file, regrid_options='bil') LOGGER.info('Dataset subset with regridding done: %s ' % model_subset) else: model_subset = call( resource=resource, variable=variable, time_range= time_range, # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap', ) LOGGER.info('Dataset time period extracted: %s ' % model_subset) except: LOGGER.exception('failed to make a data subset ') ####################### # computing anomalies ####################### try: cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[1] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] model_anomal = wr.get_anomalies(model_subset, reference=reference, sseas='multi') ##################### # extracting season ##################### model_season = wr.get_season(model_anomal, season=season) except: LOGGER.exception('failed to compute anualcycle or seasons') ####################### # call the R scripts ####################### import shlex import subprocess from blackswan import config from os.path import curdir, exists, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_projection.R' yr1 = start.year yr2 = end.year time = get_time(model_season) # , format='%Y%m%d') # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') ip, output_frec = mkstemp(dir=curdir, suffix='.txt') args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % model_season, '%s' % variable, '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""), # '%s' % output_graphics, '%s' % dat, '%s' % Rdat, '%s' % file_pca, '%s' % file_class, '%s' % output_frec, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL' ] LOGGER.info('Rcall builded') except Exception as e: msg = 'failed to build the R command %s' % e LOGGER.error(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() # , shell=True LOGGER.info('R outlog info:\n %s ' % output) LOGGER.debug('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** weatherregime in R suceeded', 90) else: LOGGER.error('NO! output returned from R call') except Exception as e: msg = 'weatherregime in R %s ' % e LOGGER.error(msg) raise Exception(msg) ################# # set the outputs ################# response.update_status('Set the process outputs ', 95) response.outputs['output_pca'].file = file_pca response.outputs['output_classification'].file = file_class response.outputs['output_netcdf'].file = model_season response.outputs['output_frequency'].file = output_frec response.update_status('done', 100) return response
def plot_analogs(configfile='config.txt', soutpdf='Analogs.pdf', simday='all', **kwargs): """ """ from datetime import datetime as dt from netCDF4 import Dataset, MFDataset import uuid from blackswan.visualisation import pdfmerge simoutpdf = soutpdf # 'Analogs.pdf' if (os.path.isfile(configfile) == True): curdir, confile = os.path.split(os.path.abspath(configfile)) lines = [line.rstrip('\n') for line in open(configfile)] for i in lines: if 'archivefile' in i: arcfile = i.split('"')[1] if 'simulationfile' in i: simfile = i.split('"')[1] if 'outputfile' in i: analogfile = i.split('"')[1] if 'nanalog' in i: nanalog = int(i.split(' =')[1]) if 'varname' in i: varname = i.split('"')[1] if 'predictordom' in i: domain = i.split('"')[1] analogfile = curdir + '/' + analogfile arcfile = curdir + '/' + arcfile simfile = curdir + '/' + simfile try: arc_times = get_time(arcfile) sim_times = get_time(simfile) except: LOGGER.debug('PLOT_ANALOGS: Not standard calendar') arc_times = get_time_nc(arcfile) sim_times = get_time_nc(simfile) sim_dataset = Dataset(simfile) simvar = sim_dataset.variables[varname][:] # TODO: check other names for lat/lon lon = sim_dataset.variables['lon'][:] lat = sim_dataset.variables['lat'][:] sim_dataset.close() domain = domain.split(",") try: domain = [float(i) for i in domain] except: domain = [lon[0], lon[-1], lat[-1], lat[0]] outlist = [] # total_simmin = np.min(simvar) # total_simmax = np.max(simvar) cont = [line.rstrip('\n') for line in open(analogfile)] Nlin = 30 for idx, item in enumerate(cont[1:]): ana = item.split() sim_date = dt.strptime(ana[0], '%Y%m%d') an_dates = [] for dat in ana[1:1 + nanalog]: an_dates.append(dt.strptime(dat, '%Y%m%d')) c_dists = ana[1 + nanalog:1 + 2 * nanalog] c_cors = ana[1 + 2 * nanalog:] dists = np.zeros((nanalog), dtype=float) cors = np.zeros((nanalog), dtype=float) for i in range(0, nanalog): dists[i] = float(c_dists[i]) cors[i] = float(c_cors[i]) # min_dist = np.min(dists) # max_corr = np.max(cors) # weights for futher use # w_dist = min_dist / dists # w_corr = cors / max_corr sim_index = idx # day by day tmp_i = [] for i in arc_times: tmp_z = '%s-%s-%s' % (i.year, i.month, i.day) tmp_i.append(tmp_z) arc_index = [] for arc in an_dates: arc_date_temp = '%s-%s-%s' % (arc.year, arc.month, arc.day) arc_index.append(tmp_i.index(arc_date_temp)) simmin = np.min(simvar[sim_index, :, :]) simmax = np.max(simvar[sim_index, :, :]) # PLOT SIM ==================================== sim_title = 'Simulation Day: ' + ana[0] output_file_name = 'sim_' + ana[0] + '.pdf' output_file = pdf_from_analog(lon=lon, lat=lat, data=simvar[sim_index, :, :], vmin=simmin, vmax=simmax, Nlin=Nlin, domain=domain, output=output_file_name, title=sim_title) outlist.append(str(output_file)) # PLOT Mean analogs ==================================== arc_dataset = Dataset(arcfile) arcvar = arc_dataset.variables[varname][:] arc_dataset.close() mean_ana = np.zeros((len(arcvar[0, :, 0]), len(arcvar[0, 0, :])), dtype=float) for ida, art in enumerate(arc_index): mean_ana = mean_ana + arcvar[art, :, :] mean_ana = mean_ana / nanalog output_an_file_name = 'ana_' + ana[0] + '.pdf' an_title = 'Mean analogs for sim Day: ' + ana[0] an_output_file = pdf_from_analog(lon=lon, lat=lat, data=mean_ana, vmin=simmin, vmax=simmax, Nlin=Nlin, domain=domain, output=output_an_file_name, title=an_title) outlist.append(str(an_output_file)) # PLOT BEST (first) analog output_ban_file_name = 'bana_' + ana[0] + '.pdf' # PDF!! ban_title = 'BEST analog for sim Day ' + ana[0] + ' is: ' + ana[1] ban_output_file = pdf_from_analog(lon=lon, lat=lat, data=arcvar[arc_index[0]], vmin=simmin, vmax=simmax, Nlin=Nlin, domain=domain, output=output_ban_file_name, title=ban_title) outlist.append(str(ban_output_file)) # PLOT WORST (last) analog output_wan_file_name = 'wana_' + ana[0] + '.pdf' # PDF!! wan_title = 'LAST analog for sim Day ' + ana[0] + ' is: ' + ana[ nanalog] wan_output_file = pdf_from_analog(lon=lon, lat=lat, data=arcvar[arc_index[-1]], vmin=simmin, vmax=simmax, Nlin=Nlin, domain=domain, output=output_wan_file_name, title=wan_title) outlist.append(str(wan_output_file)) # PLOT Max and Min correl analog min_c_index = np.argmin(cors) max_c_index = np.argmax(cors) output_bcan_file_name = 'bcana_' + ana[0] + '.pdf' # PDF!! bcan_title = 'Analog with max corr for sim Day ' + ana[ 0] + ' is: ' + ana[1 + max_c_index] bcan_output_file = pdf_from_analog( lon=lon, lat=lat, data=arcvar[arc_index[max_c_index]], vmin=simmin, vmax=simmax, Nlin=Nlin, domain=domain, output=output_bcan_file_name, title=bcan_title) outlist.append(str(bcan_output_file)) output_wcan_file_name = 'wcana_' + ana[0] + '.pdf' # PDF!! wcan_title = 'Analog with min corr for sim Day ' + ana[ 0] + ' is: ' + ana[1 + min_c_index] wcan_output_file = pdf_from_analog( lon=lon, lat=lat, data=arcvar[arc_index[min_c_index]], vmin=simmin, vmax=simmax, Nlin=Nlin, domain=domain, output=output_wcan_file_name, title=wcan_title) outlist.append(str(wcan_output_file)) """ # PLOT analogs dist weighted ==================================== mean_ana = np.zeros((len(arcvar[0,:,0]),len(arcvar[0,0,:])),dtype=float) for ida, art in enumerate(arc_index): mean_ana=mean_ana+arcvar[art,:,:]*w_dist[ida] mean_ana = mean_ana/sum(w_dist) # PLOT analogs corr weighted ==================================== mean_ana = np.zeros((len(arcvar[0,:,0]),len(arcvar[0,0,:])),dtype=float) for ida, art in enumerate(arc_index): mean_ana=mean_ana+arcvar[art,:,:]*w_corr[ida] mean_ana = mean_ana/sum(w_corr) """ simoutpdf = pdfmerge(outlist, outpdf=soutpdf) # clean for i_pdf in outlist: os.remove(i_pdf) else: simoutpdf = soutpdf # 'Analogs.pdf' # TODO: call this func with analogfile = '..', # arguments came from kwargs # need to prescribe all input info - to use with external analogs results. # check kwargs: ncfiles, N analogs (?), periods, etc return simoutpdf
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 6) dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data # timres = request.inputs['timeres'][0].data timres = 'day' season = request.inputs['season'][0].data bboxDef = '-20,40,30,70' # in general format bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) distance = request.inputs['dist'][0].data method = request.inputs['method'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 7) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.exception(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### start = dateSt end = dateEn ########################### # set the environment ########################### response.update_status('fetching data from archive', 9) try: if model == 'NCEP': getlevel = False if 'z' in var: level = var.strip('z') # conform_units_to = None else: level = None if var == 'precip': var = 'pr_wtr' # conform_units_to = 'hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: level = var.strip('z') # conform_units_to = None else: level = None # conform_units_to = 'hPa' else: LOGGER.exception('Reanalyses dataset not known') LOGGER.info('environment set for model: %s' % model) except Exception: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) ########################################## # fetch Data from original data archive ########################################## # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017: # - ALL the data will be downloaded, 1950 - 2017 try: model_nc = rl(start=start.year, end=end.year, dataset=model, variable=var, timres=timres, getlevel=getlevel) LOGGER.info('reanalyses data fetched') except Exception: msg = 'failed to get reanalyses data' LOGGER.exception(msg) raise Exception(msg) response.update_status('subsetting region of interest', 10) # from flyingpigeon.weatherregimes import get_level LOGGER.debug("start and end time: %s - %s" % (start, end)) time_range = [start, end] # Checking memory and dataset size model_size = get_files_size(model_nc) memory_avail = psutil.virtual_memory().available thrs = 0.5 # 50% if (model_size >= thrs * memory_avail): ser_r = True else: ser_r = False # ################################ # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years # if ('20CRV2' in model) and ('z' in var): if ('z' in var): tmp_total = [] origvar = get_variable(model_nc) for z in model_nc: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) os.system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level) else: if ser_r: LOGGER.debug('Process reanalysis step-by-step') tmp_total = [] for z in model_nc: # tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap', prefix='Rdom_' + os.path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0]) model_subset_tmp = call(resource=tmp_total, variable=var, time_range=time_range) else: LOGGER.debug('Using whole dataset at once') model_subset_tmp = call(resource=model_nc, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=time_range, ) # If dataset is 20CRV2 the 6 hourly file should be converted to daily. # Option to use previously 6h data from cache (if any) and not download daily files. if '20CRV2' in model: if timres == '6h': from cdo import Cdo cdo = Cdo(env=os.environ) model_subset = '%s.nc' % uuid.uuid1() tmp_f = '%s.nc' % uuid.uuid1() cdo_op = getattr(cdo, 'daymean') cdo_op(input=model_subset_tmp, output=tmp_f) sti = '00:00:00' cdo_op = getattr(cdo, 'settime') cdo_op(sti, input=tmp_f, output=model_subset) LOGGER.debug('File Converted from: %s to daily' % (timres)) else: model_subset = model_subset_tmp else: model_subset = model_subset_tmp LOGGER.info('Dataset subset done: %s ', model_subset) response.update_status('dataset subsetted', 15) # ====================================== LOGGER.debug("get_input_subset_dataset took %s seconds.", time.time() - start_time) response.update_status('**** Input data fetched', 20) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 30) start_time = time.time() # measure data preperation ... # ----------------------- # try: # import ctypes # # TODO: This lib is for linux # mkl_rt = ctypes.CDLL('libmkl_rt.so') # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('Current number of threads: %s' % (nth)) # mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('NEW number of threads: %s' % (nth)) # # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) # os.environ['MKL_NUM_THREADS'] = str(nth) # os.environ['OMP_NUM_THREADS'] = str(nth) # except Exception as e: # msg = 'Failed to set THREADS %s ' % e # LOGGER.debug(msg) # ----------------------- response.update_status('Start DIM calc', 50) # Calculation of Local Dimentsions ================== LOGGER.debug('Calculation of the distances using: %s metric' % (distance)) LOGGER.debug('Calculation of the dims with: %s' % (method)) dim_filename = '%s.txt' % model tmp_dim_fn = '%s.txt' % uuid.uuid1() Rsrc = config.Rsrc_dir() if (method == 'Python'): try: l_dist, l_theta = localdims(resource=model_subset, variable=var, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'Python_wrap'): try: l_dist, l_theta = localdims_par(resource=model_subset, variable=var, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'R'): # from os.path import join Rfile = 'localdimension_persistence_fullD.R' args = ['Rscript', os.path.join(Rsrc, Rfile), '%s' % model_subset, '%s' % var, '%s' % tmp_dim_fn] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R' LOGGER.exception(msg) raise Exception(msg) if (method == 'R_wrap'): # from os.path import join Rfile = 'localdimension_persistence_serrD.R' args = ['Rscript', os.path.join(Rsrc, Rfile), '%s' % model_subset, '%s' % var, '%s' % tmp_dim_fn] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R_wrap suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R_wrap' LOGGER.exception(msg) raise Exception(msg) try: res_times = get_time(model_subset) except: LOGGER.debug('Not standard calendar') res_times = analogs.get_time_nc(model_subset) # plot 1 ld_pdf = analogs.pdf_from_ld(x=l_dist, y=l_theta) # res_times=[res_times[i].isoformat().strip().split("T")[0].replace('-','') for i in range(len(res_times))] # concatenation of values concat_vals = column_stack([res_times, l_theta, l_dist]) savetxt(dim_filename, concat_vals, fmt='%s', delimiter=',') # output season try: seas = _TIMEREGIONS_[season]['month'] # [12, 1, 2] LOGGER.info('Season to grep from TIMEREGIONS: %s ' % season) LOGGER.info('Season N to grep from TIMEREGIONS: %s ' % seas) except: LOGGER.info('No months in TIMEREGIONS, moving to months') try: seas = _MONTHS_[season]['month'] # [1] or [2] or ... LOGGER.info('Season to grep from MONTHS: %s ' % season) LOGGER.info('Season N to grep from MONTHS: %s ' % seas) except: seas = [1,2,3,4,5,6,7,8,9,10,11,12] ind = [] # TODO: change concat_vals[i][0][4:6] to dt_obj.month !!! for i in range(len(res_times)): if (int(concat_vals[i][0][4:6]) in seas[:]): ind.append(i) sf = column_stack([concat_vals[i] for i in ind]).T seas_dim_filename = season + '_' + dim_filename savetxt(seas_dim_filename, sf, fmt='%s', delimiter=',') # -------------------------- plot with R --------------- R_plot_file = 'plot_csv.R' ld2_pdf = 'local_dims.pdf' ld2_seas_pdf = season + '_local_dims.pdf' args = ['Rscript', os.path.join(Rsrc, R_plot_file), '%s' % dim_filename, '%s' % ld2_pdf] try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf(s) to pass to output args = ['Rscript', os.path.join(Rsrc, R_plot_file), '%s' % seas_dim_filename, '%s' % ld2_seas_pdf] try: output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf(s) to pass to output # # ==================================================== response.update_status('preparing output', 80) response.outputs['ldist'].file = dim_filename response.outputs['ldist_seas'].file = seas_dim_filename response.outputs['ld_pdf'].file = ld_pdf response.outputs['ld2_pdf'].file = ld2_pdf response.outputs['ld2_seas_pdf'].file = ld2_seas_pdf response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ # response.update_status('execution started at : %s ' % dt.now(), 5) # start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 10) resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data bboxDef = '-20,40,30,70' # in general format # level = 500 season = request.inputs['season'][0].data level = request.inputs['level'][0].data if (level == 500): dummylevel = 1000 # dummy workaround for cdo sellevel else: dummylevel = 500 LOGGER.debug('LEVEL selected: %s hPa' % (level)) bbox = [] bboxStr = request.inputs['BBox'][0].data LOGGER.debug('BBOX selected by user: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # Checking for wrong cordinates and apply default if nesessary if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180) or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90): bboxStr = bboxDef # request.inputs['BBox'].default # .default doesn't work anymore!!! LOGGER.debug( 'BBOX is out of the range, using default instead: %s ' % (bboxStr)) bboxStr = bboxStr.split(',') # for i in bboxStr: bbox.append(int(i)) bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) distance = request.inputs['dist'][0].data method = request.inputs['method'][0].data LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 20) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: # not nesessary if fix ocgis_module.py dateSt = dt.combine(dateSt, dt_time(12, 0)) dateEn = dt.combine(dateEn, dt_time(12, 0)) # Check if 360_day calendar: try: if type(resource) is not list: resource = [resource] modcal, calunits = get_calendar(resource[0]) if '360_day' in modcal: if dateSt.day == 31: dateSt = dateSt.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateSt)) if dateEn.day == 31: dateEn = dateEn.replace(day=30) LOGGER.debug('Date has been changed for: %s' % (dateEn)) except: LOGGER.debug('Could not detect calendar') start = dateSt end = dateEn time_range = [start, end] LOGGER.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took %s seconds.", time.time() - start_time) response.update_status('Read in and convert the arguments', 30) ######################## # input data preperation ######################## # TODO: Check if files containing more than one dataset response.update_status('Start preparing input data', 40) start_time = time.time() # mesure data preperation ... try: # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...]) if type(resource) == list: # resource.sort() resource = sorted( resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] # =============================================================== # REMOVE resources which are out of interest from the list # (years > and < than requested for calculation) tmp_resource = [] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= end) and (tmpEn >= start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource # Try to fix memory issue... (ocgis call for files like 20-30 gb... ) # IF 4D - select pressure level before domain cut # # resource properties ds = Dataset(resource[0]) variable = get_variable(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown_model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen > 3): lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units == 'Pa'): level = level * 100 dummylevel = dummylevel * 100 # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it # Not just level = level * 100. # Get Levels from cdo import Cdo cdo = Cdo(env=environ) lev_res = [] if (dimlen > 3): for res_fn in resource: tmp_f = 'lev_' + path.basename(res_fn) try: tmp_f = call(resource=res_fn, variable=variable, spatial_wrapping='wrap', level_range=[int(level), int(level)], prefix=tmp_f[0:-3]) except: comcdo = '%s,%s' % (level, dummylevel) cdo.sellevel(comcdo, input=res_fn, output=tmp_f) lev_res.append(tmp_f) else: lev_res = resource # =============================================================== # TODO: Before domain, Regrid to selected grid! (???) if no rean. # ================================================================ # Get domain regr_res = [] for res_fn in lev_res: tmp_f = 'dom_' + path.basename(res_fn) comcdo = '%s,%s,%s,%s' % (bbox[0], bbox[2], bbox[1], bbox[3]) try: tmp_f = call(resource=res_fn, geom=bbox, spatial_wrapping='wrap', prefix=tmp_f[0:-3]) except: cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f) regr_res.append(tmp_f) # ============================ # Block to collect final data if (dimlen > 3): res_tmp_tmp = get_level(regr_res, level=level) variable = 'z%s' % level res_tmp = call(resource=res_tmp_tmp, variable=variable, time_range=time_range) else: res_tmp = call(resource=regr_res, time_range=time_range, spatial_wrapping='wrap') ####################################################################################### except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.debug(msg) raise Exception(msg) LOGGER.debug("data preperation took %s seconds.", time.time() - start_time) # ----------------------- # try: # import ctypes # # TODO: This lib is for linux # mkl_rt = ctypes.CDLL('libmkl_rt.so') # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('Current number of threads: %s' % (nth)) # mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64))) # nth = mkl_rt.mkl_get_max_threads() # LOGGER.debug('NEW number of threads: %s' % (nth)) # # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?) # environ['MKL_NUM_THREADS'] = str(nth) # environ['OMP_NUM_THREADS'] = str(nth) # except Exception as e: # msg = 'Failed to set THREADS %s ' % e # LOGGER.debug(msg) # ----------------------- response.update_status('Start DIM calc', 50) # Calculation of Local Dimentsions ================== LOGGER.debug('Calculation of the distances using: %s metric' % (distance)) LOGGER.debug('Calculation of the dims with: %s' % (method)) dim_filename = '%s.txt' % model_id tmp_dim_fn = '%s.txt' % uuid.uuid1() Rsrc = config.Rsrc_dir() if (method == 'Python'): try: l_dist, l_theta = localdims(resource=res_tmp, variable=variable, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'Python_wrap'): try: l_dist, l_theta = localdims_par(resource=res_tmp, variable=variable, distance=str(distance)) response.update_status('**** Dims with Python suceeded', 60) except: LOGGER.exception('NO! output returned from Python call') if (method == 'R'): # from os.path import join Rfile = 'localdimension_persistence_fullD.R' args = [ 'Rscript', path.join(Rsrc, Rfile), '%s' % res_tmp, '%s' % variable, '%s' % tmp_dim_fn ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R' LOGGER.exception(msg) raise Exception(msg) if (method == 'R_wrap'): # from os.path import join Rfile = 'localdimension_persistence_serrD.R' args = [ 'Rscript', path.join(Rsrc, Rfile), '%s' % res_tmp, '%s' % variable, '%s' % tmp_dim_fn ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: %s' % (args)) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) if len(output) > 0: response.update_status('**** Dims with R_wrap suceeded', 60) else: LOGGER.exception('NO! output returned from R call') # HERE READ DATA FROM TEXT FILES R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',') l_theta = R_resdim[:, 0] l_dist = R_resdim[:, 1] except: msg = 'Dim with R_wrap' LOGGER.exception(msg) raise Exception(msg) try: res_times = get_time(res_tmp) except: LOGGER.debug('Not standard calendar') res_times = analogs.get_time_nc(res_tmp) # plot 1 ld_pdf = analogs.pdf_from_ld(x=l_dist, y=l_theta) # res_times = [ res_times[i].isoformat().strip().split("T")[0].replace('-', '') for i in range(len(res_times)) ] # concatenation of values concat_vals = column_stack([res_times, l_theta, l_dist]) savetxt(dim_filename, concat_vals, fmt='%s', delimiter=',') # output season try: seas = _TIMEREGIONS_[season]['month'] # [12, 1, 2] LOGGER.info('Season to grep from TIMEREGIONS: %s ' % season) LOGGER.info('Season N to grep from TIMEREGIONS: %s ' % seas) except: LOGGER.info('No months in TIMEREGIONS, moving to months') try: seas = _MONTHS_[season]['month'] # [1] or [2] or ... LOGGER.info('Season to grep from MONTHS: %s ' % season) LOGGER.info('Season N to grep from MONTHS: %s ' % seas) except: seas = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] ind = [] # TODO: change concat_vals[i][0][4:6] to dt_obj.month !!! for i in range(len(res_times)): if (int(concat_vals[i][0][4:6]) in seas[:]): ind.append(i) sf = column_stack([concat_vals[i] for i in ind]).T seas_dim_filename = season + '_' + dim_filename savetxt(seas_dim_filename, sf, fmt='%s', delimiter=',') # -------------------------- plot with R --------------- R_plot_file = 'plot_csv.R' ld2_pdf = 'local_dims.pdf' ld2_seas_pdf = season + '_local_dims.pdf' args = [ 'Rscript', path.join(Rsrc, R_plot_file), '%s' % dim_filename, '%s' % ld2_pdf ] try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf to pass to output args = [ 'Rscript', path.join(Rsrc, R_plot_file), '%s' % seas_dim_filename, '%s' % ld2_seas_pdf ] try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('R outlog info:\n %s ' % output) LOGGER.exception('R outlog errors:\n %s ' % error) except: msg = 'Could not produce plot' LOGGER.exception(msg) # TODO: Here need produce empty pdf(s) to pass to output # # ==================================================== response.update_status('preparing output', 80) response.outputs['ldist'].file = dim_filename response.outputs['ldist_seas'].file = seas_dim_filename response.outputs['ld_pdf'].file = ld_pdf response.outputs['ld2_pdf'].file = ld2_pdf response.outputs['ld2_seas_pdf'].file = ld2_seas_pdf response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response