def test_get_timerange(): start, end = utils.get_timerange(local_path(TESTDATA['cmip5_tasmax_2006_nc'])) assert "20060116" == start assert "20061216" == end start, end = utils.get_timerange(local_path(TESTDATA['cordex_tasmax_2007_nc'])) assert "20070116" == start assert "20071216" == end start, end = utils.get_timerange([local_path(TESTDATA['cordex_tasmax_2006_nc']), local_path(TESTDATA['cordex_tasmax_2007_nc'])]) assert "20060215" == start assert "20071216" == end
def test_get_timerange(): start, end = utils.get_timerange(local_path(TESTDATA["cmip5_tasmax_2006_nc"])) assert "20060116" == start assert "20061216" == end start, end = utils.get_timerange(local_path(TESTDATA["cordex_tasmax_2007_nc"])) assert "20070116" == start assert "20071216" == end start, end = utils.get_timerange( [local_path(TESTDATA["cordex_tasmax_2006_nc"]), local_path(TESTDATA["cordex_tasmax_2007_nc"])] ) assert "20060215" == start assert "20071216" == end
def set_dynamic_md(resource): """ Dynamic meta data like time frequency, spatial extent, start/end time, etc. :param resource: netCDF file where basic meta data should be set """ from flyingpigeon.utils import get_timerange, get_time frequency = get_frequency(resource) time_coverage_start, time_coverage_end = get_timerange(resource) time_number_steps = len(get_time(resource)) # max_lat, min_lat, max_lon, min_lat = get_extent(resource) ds = Dataset(resource, mode='a') try: driving_experiment = ds.driving_experiment ds.delncattr('driving_experiment') except Exception as e: logger.error(e) driving_experiment = '' try: driving_experiment_name = ds.driving_experiment_name ds.delncattr('driving_experiment_name') except Exception as e: logger.error(e) driving_experiment_name = '' try: driving_model_ensemble_member = ds.driving_model_ensemble_member ds.delncattr('driving_model_ensemble_member') except Exception as e: logger.error(e) driving_model_ensemble_member = '' try: experiment = ds.experiment ds.delncattr('experiment') except Exception as e: logger.error(e) experiment = '' try: tracking_id = ds.tracking_id ds.delncattr('tracking_id') except Exception as e: logger.error(e) tracking_id = '' try: experiment_id = ds.experiment_id ds.delncattr('experiment_id') except Exception as e: logger.error(e) experiment_id = '' try: project_id = ds.project_id ds.delncattr('project_id') except Exception as e: logger.error(e) project_id = '' try: institution_id = ds.institution_id ds.delncattr('institution_id') except Exception as e: logger.error(e) institution_id = '' try: model_version_id = ds.model_version_id ds.delncattr('model_version_id') except Exception as e: logger.error(e) model_version_id = '' try: driving_model_id = ds.driving_model_id ds.delncattr('driving_model_id') except Exception as e: logger.error(e) driving_model_id = '' try: driving_ensemble_member = ds.driving_ensemble_member ds.delncattr('driving_ensemble_member') except Exception as e: logger.error(e) driving_ensemble_member = '' try: driving_model_id = ds.driving_model_id ds.delncattr('driving_model_id') except Exception as e: logger.error(e) driving_model_id = '' try: model_id = ds.model_id ds.delncattr('model_id') except Exception as e: logger.error(e) driving_model_id ='' try: contact = ds.contact ds.delncattr('contact') except Exception as e: logger.error(e) contact = '' try: driving_experiment_id = ds.driving_experiment_id ds.delncattr('driving_experiment_id') except Exception as e: logger.error(e) driving_experiment_id = '' try: domain = ds.CORDEX_domain except Exception as e: logger.error(e) domain = '' ds.close() min_lat, max_lat, min_lon, max_lon = get_extent(resource) geospatial_increment = get_geospatial_increment(resource) try: md_dynamic = { 'in_var_driving_experiment' :driving_experiment, 'in_var_driving_experiment_name': driving_experiment_name, 'in_var_driving_model_ensemble_member' : driving_model_ensemble_member, 'in_var_experiment': experiment, 'in_var_experiment_id': experiment_id, 'in_var_project_id': project_id, 'in_var_contact': contact, 'in_var_institution_id':institution_id, 'in_var_model_version_id': model_version_id, 'in_var_driving_model_id': driving_model_id, 'in_var_model_id': model_id, 'in_var_driving_ensemble_member':driving_ensemble_member, 'in_var_driving_experiment_id': driving_experiment_id, 'in_var_domain': domain, 'in_var_tracking_id' : tracking_id, 'frequency': frequency, 'time_coverage_start': time_coverage_start, 'time_coverage_end':time_coverage_end, 'time_number_steps':time_number_steps, #'time_number_gaps': '', #'cdm_datatype':'' , 'domain':'%s_subset' % domain , 'geospatial_increment': geospatial_increment, 'geospatial_lat_min':min_lat , 'geospatial_lat_max':max_lat , 'geospatial_lon_min':min_lon , 'geospatial_lon_max':max_lon , } #:product = "output" ; #:rcm_version_id = "v1" ; #:references = "http://www.smhi.se/en/Research/Research-departments/climate-research-rossby-centre" ; except Exception as e: logger.error('failed to populate dynamic metadata dictionary') try: ds = Dataset(resource, mode='a') ds.setncatts(md_dynamic) ds.close() except Exception as e: logger.error(e) return(resource)
def set_dynamic_md(resource): """ Dynamic meta data like time frequency, spatial extent, start/end time, etc. :param resource: netCDF file where basic meta data should be set """ from flyingpigeon.utils import get_timerange, get_time frequency = get_frequency(resource) time_coverage_start, time_coverage_end = get_timerange(resource) time_number_steps = len(get_time(resource)) # max_lat, min_lat, max_lon, min_lat = get_extent(resource) ds = Dataset(resource, mode='a') try: driving_experiment = ds.driving_experiment ds.delncattr('driving_experiment') except Exception as e: LOGGER.error(e) driving_experiment = '' try: driving_experiment_name = ds.driving_experiment_name ds.delncattr('driving_experiment_name') except Exception as e: LOGGER.error(e) driving_experiment_name = '' try: driving_model_ensemble_member = ds.driving_model_ensemble_member ds.delncattr('driving_model_ensemble_member') except Exception as e: LOGGER.error(e) driving_model_ensemble_member = '' try: experiment = ds.experiment ds.delncattr('experiment') except Exception as e: LOGGER.error(e) experiment = '' try: tracking_id = ds.tracking_id ds.delncattr('tracking_id') except Exception as e: LOGGER.error(e) tracking_id = '' try: experiment_id = ds.experiment_id ds.delncattr('experiment_id') except Exception as e: LOGGER.error(e) experiment_id = '' try: project_id = ds.project_id ds.delncattr('project_id') except Exception as e: LOGGER.error(e) project_id = '' try: institution_id = ds.institution_id ds.delncattr('institution_id') except Exception as e: LOGGER.error(e) institution_id = '' try: model_version_id = ds.model_version_id ds.delncattr('model_version_id') except Exception as e: LOGGER.error(e) model_version_id = '' try: driving_model_id = ds.driving_model_id ds.delncattr('driving_model_id') except Exception as e: LOGGER.error(e) driving_model_id = '' try: driving_ensemble_member = ds.driving_ensemble_member ds.delncattr('driving_ensemble_member') except Exception as e: LOGGER.error(e) driving_ensemble_member = '' try: driving_model_id = ds.driving_model_id ds.delncattr('driving_model_id') except Exception as e: LOGGER.error(e) driving_model_id = '' try: model_id = ds.model_id ds.delncattr('model_id') except Exception as e: LOGGER.error(e) driving_model_id ='' try: contact = ds.contact ds.delncattr('contact') except Exception as e: LOGGER.error(e) contact = '' try: driving_experiment_id = ds.driving_experiment_id ds.delncattr('driving_experiment_id') except Exception as e: LOGGER.error(e) driving_experiment_id = '' try: domain = ds.CORDEX_domain except Exception as e: LOGGER.error(e) domain = '' ds.close() min_lat, max_lat, min_lon, max_lon = get_extent(resource) geospatial_increment = get_geospatial_increment(resource) try: md_dynamic = { 'in_var_driving_experiment' :driving_experiment, 'in_var_driving_experiment_name': driving_experiment_name, 'in_var_driving_model_ensemble_member' : driving_model_ensemble_member, 'in_var_experiment': experiment, 'in_var_experiment_id': experiment_id, 'in_var_project_id': project_id, 'in_var_contact': contact, 'in_var_institution_id':institution_id, 'in_var_model_version_id': model_version_id, 'in_var_driving_model_id': driving_model_id, 'in_var_model_id': model_id, 'in_var_driving_ensemble_member':driving_ensemble_member, 'in_var_driving_experiment_id': driving_experiment_id, 'in_var_domain': domain, 'in_var_tracking_id' : tracking_id, 'frequency': frequency, 'time_coverage_start': time_coverage_start, 'time_coverage_end':time_coverage_end, 'time_number_steps':time_number_steps, #'time_number_gaps': '', #'cdm_datatype':'' , 'domain':'%s_subset' % domain , 'geospatial_increment': geospatial_increment, 'geospatial_lat_min':min_lat , 'geospatial_lat_max':max_lat , 'geospatial_lon_min':min_lon , 'geospatial_lon_max':max_lon , } #:product = "output" ; #:rcm_version_id = "v1" ; #:references = "http://www.smhi.se/en/Research/Research-departments/climate-research-rossby-centre" ; except Exception as e: LOGGER.error('failed to populate dynamic metadata dictionary') try: ds = Dataset(resource, mode='a') ds.setncatts(md_dynamic) ds.close() except Exception as e: LOGGER.error(e) return(resource)
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' process_start_time = time.time() # measure process execution time ... response.update_status('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... resource = archiveextract( resource=rename_complexinputs(request.inputs['resource'])) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data regrset = request.inputs['regrset'][0].data # fix 31 December issue # refSt = dt.combine(refSt,dt_time(12,0)) # refEn = dt.combine(refEn,dt_time(12,0)) # dateSt = dt.combine(dateSt,dt_time(12,0)) # dateEn = dt.combine(dateEn,dt_time(12,0)) seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data # bbox = [-80, 20, 50, 70] # TODO: Add checking for wrong cordinates and apply default if nesessary bbox = [] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) direction = request.inputs['direction'][0].data normalize = request.inputs['normalize'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data model_var = request.inputs['reanalyses'][0].data model, var = model_var.split('_') try: if direction == 're2mo': anaSt = dt.combine(dateSt, dt_time( 0, 0)) #dt.strptime(dateSt[0], '%Y-%m-%d') anaEn = dt.combine(dateEn, dt_time( 0, 0)) #dt.strptime(dateEn[0], '%Y-%m-%d') refSt = dt.combine(refSt, dt_time( 12, 0)) #dt.strptime(refSt[0], '%Y-%m-%d') refEn = dt.combine(refEn, dt_time( 12, 0)) #dt.strptime(refEn[0], '%Y-%m-%d') r_time_range = [anaSt, anaEn] m_time_range = [refSt, refEn] elif direction == 'mo2re': anaSt = dt.combine(dateSt, dt_time( 12, 0)) #dt.strptime(refSt[0], '%Y-%m-%d') anaEn = dt.combine(dateEn, dt_time( 12, 0)) #dt.strptime(refEn[0], '%Y-%m-%d') refSt = dt.combine(refSt, dt_time( 0, 0)) #dt.strptime(dateSt[0], '%Y-%m-%d') refEn = dt.combine(refEn, dt_time( 0, 0)) #dt.strptime(dateEn[0], '%Y-%m-%d') r_time_range = [refSt, refEn] m_time_range = [anaSt, anaEn] else: LOGGER.exception( 'failed to find time periods for comparison direction') except: msg = 'failed to put simulation and reference time in order' LOGGER.exception(msg) raise Exception(msg) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.exception('output format not valid') try: if model == 'NCEP': getlevel = True if 'z' in var: level = var.strip('z') variable = 'hgt' # conform_units_to='hPa' else: variable = 'slp' level = None # conform_units_to='hPa' elif '20CRV2' in model: getlevel = False if 'z' in var: variable = 'hgt' level = var.strip('z') # conform_units_to=None else: variable = 'prmsl' level = None # conform_units_to='hPa' else: LOGGER.exception('Reanalyses model not known') LOGGER.info('environment set') except: msg = 'failed to set environment' LOGGER.exception(msg) raise Exception(msg) # LOGGER.exception("init took %s seconds.", time.time() - start_time) response.update_status('Read in the arguments', 6) ################# # get input data ################# # TODO: do not forget to select years start_time = time.time() # measure get_input_data ... response.update_status('fetching input data', 7) try: if direction == 're2mo': nc_reanalyses = reanalyses(start=anaSt.year, end=anaEn.year, variable=var, dataset=model, getlevel=getlevel) else: nc_reanalyses = reanalyses(start=refSt.year, end=refEn.year, variable=var, dataset=model, getlevel=getlevel) if type(nc_reanalyses) == list: nc_reanalyses = sorted( nc_reanalyses, key=lambda i: path.splitext(path.basename(i))[0]) else: nc_reanalyses = [nc_reanalyses] # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb # So it makes sense, to operate it step-by-step # TODO: need to create dictionary for such datasets (for models as well) # TODO: benchmark the method bellow for NCEP z500 for 60 years, may be use the same (!) # TODO Now everything regrid to the reanalysis if ('20CRV2' in model) and ('z' in var): tmp_total = [] origvar = get_variable(nc_reanalyses) for z in nc_reanalyses: tmp_n = 'tmp_%s' % (uuid.uuid1()) b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox, spatial_wrapping='wrap', prefix='levdom_' + path.basename(z)[0:-3]) tmp_total.append(b0) tmp_total = sorted( tmp_total, key=lambda i: path.splitext(path.basename(i))[0]) inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=r_time_range) # Clean for i in tmp_total: tbr = 'rm -f %s' % (i) #system(tbr) # Create new variable ds = Dataset(inter_subset_tmp, mode='a') z_var = ds.variables.pop(origvar) dims = z_var.dimensions new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_var[:, :, :] = squeeze(z_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() nc_subset = call(inter_subset_tmp, variable='z%s' % level) else: nc_subset = call( resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap', time_range=r_time_range, # conform_units_to=conform_units_to ) # nc_subset = call(resource=nc_reanalyses, variable=var, geom=bbox, spatial_wrapping='wrap') # XXXXXX wrap # LOGGER.exception("get_input_subset_model took %s seconds.", time.time() - start_time) response.update_status('**** Input reanalyses data fetched', 10) except: msg = 'failed to fetch or subset input files' LOGGER.exception(msg) raise Exception(msg) ######################## # input data preperation ######################## response.update_status('Start preparing input data', 12) # Filter resource: if type(resource) == list: resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] tmp_resource = [] m_start = m_time_range[0] m_end = m_time_range[1] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if ((tmpSt <= m_end) and (tmpEn >= m_start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource start_time = time.time() # mesure data preperation ... # TODO: Check the callendars ! for model vs reanalyses. # TODO: Check the units! model vs reanalyses. try: m_total = [] modvar = get_variable(resource) # resource properties ds = Dataset(resource[0]) m_var = ds.variables[modvar] dims = list(m_var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen > 3): lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units == 'Pa'): m_level = str(int(level) * 100) else: m_level = level else: m_level = None if level == None: level_range = None else: level_range = [int(m_level), int(m_level)] for z in resource: tmp_n = 'tmp_%s' % (uuid.uuid1()) # select level and regrid b0 = call( resource=z, variable=modvar, level_range=level_range, spatial_wrapping='wrap', #cdover='system', regrid_destination=nc_reanalyses[0], regrid_options='bil', prefix=tmp_n) # select domain b01 = call(resource=b0, geom=bbox, spatial_wrapping='wrap', prefix='levregr_' + path.basename(z)[0:-3]) tbr = 'rm -f %s' % (b0) #system(tbr) tbr = 'rm -f %s' % (tmp_n) #system(tbr) # get full resource m_total.append(b01) ds.close() model_subset = call(m_total, time_range=m_time_range) for i in m_total: tbr = 'rm -f %s' % (i) #system(tbr) if m_level is not None: # Create new variable in model set ds = Dataset(model_subset, mode='a') mod_var = ds.variables.pop(modvar) dims = mod_var.dimensions new_modvar = ds.createVariable('z%s' % level, mod_var.dtype, dimensions=(dims[0], dims[2], dims[3])) new_modvar[:, :, :] = squeeze(mod_var[:, 0, :, :]) # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()}) ds.close() mod_subset = call(model_subset, variable='z%s' % level) else: mod_subset = model_subset # if direction == 're2mo': # try: # response.update_status('Preparing simulation data', 15) # reanalyses_subset = call(resource=nc_subset, time_range=[anaSt, anaEn]) # except: # msg = 'failed to prepare simulation period' # LOGGER.exception(msg) # try: # response.update_status('Preparing target data', 17) # var_target = get_variable(resource) # # var_simulation = get_variable(simulation) # model_subset_tmp = call(resource=resource, variable=var_target, # time_range=[refSt, refEn], # t_calendar='standard', # spatial_wrapping='wrap', # regrid_destination=nc_reanalyses[0], # regrid_options='bil') # # model_subset = call(resource=resource, variable=var_target, # # time_range=[refSt, refEn], # # geom=bbox, # # t_calendar='standard', # # # conform_units_to=conform_units_to, # # spatial_wrapping='wrap', # # regrid_destination=reanalyses_subset, # # regrid_options='bil') # XXXXXXXXXXXX ADD WRAP rem calendar # model_subset = call(resource=model_subset_tmp,variable=var_target, geom=bbox, spatial_wrapping='wrap', t_calendar='standard') # # ISSUE: the regrided model has white border with null! Check it. # # check t_calendar! # except: # msg = 'failed subset archive model' # LOGGER.exception(msg) # raise Exception(msg) # else: # try: # response.update_status('Preparing target data', 15) # var_target = get_variable(resource) # # var_simulation = get_variable(simulation) # model_subset = call(resource=resource, variable=var_target, # time_range=[refSt, refEn], # geom=bbox, # t_calendar='standard', # # conform_units_to=conform_units_to, # # spatial_wrapping='wrap', # ) # except: # msg = 'failed subset archive model' # LOGGER.exception(msg) # raise Exception(msg) # try: # response.update_status('Preparing simulation data', 17) # reanalyses_subset = call(resource=nc_subset, # time_range=[anaSt, anaEn], # regrid_destination=model_subset, # regrid_options='bil') # except: # msg = 'failed to prepare simulation period' # LOGGER.exception(msg) except: msg = 'failed to subset simulation or reference data' LOGGER.exception(msg) raise Exception(msg) # -------------------------------------------- try: if direction == 'mo2re': simulation = mod_subset archive = nc_subset base_id = model sim_id = model_id elif direction == 're2mo': simulation = nc_subset archive = mod_subset base_id = model_id sim_id = model else: LOGGER.exception('direction not valid: %s ' % direction) except: msg = 'failed to find comparison direction' LOGGER.exception(msg) raise Exception(msg) try: if level is not None: out_var = 'z%s' % level else: var_archive = get_variable(archive) var_simulation = get_variable(simulation) if var_archive != var_simulation: rename_variable(archive, oldname=var_archive, newname=var_simulation) out_var = var_simulation LOGGER.info('varname %s in netCDF renamed to %s' % (var_archive, var_simulation)) except: msg = 'failed to rename variable in target files' LOGGER.exception(msg) raise Exception(msg) try: if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc( archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except: msg = 'failed to prepare seasonal cycle reference files' LOGGER.exception(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] # LOGGER.exception("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ response.update_status('writing config file', 18) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=base_id, sim_id=sim_id, timewin=timewin, varname=var, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[ dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d') ], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except: msg = 'failed to generate config file' LOGGER.exception(msg) raise Exception(msg) # LOGGER.exception("write_config took %s seconds.", time.time() - start_time) ####################### # CASTf90 call ####################### import subprocess import shlex start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 20) try: # response.update_status('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('analogue.out info:\n %s ' % output) LOGGER.exception('analogue.out errors:\n %s ' % error) response.update_status('**** CASTf90 suceeded', 90) except: msg = 'CASTf90 failed' LOGGER.exception(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) response.update_status('preparting output', 91) # Stopper to keep twitcher results, for debug # dummy=dummy response.outputs[ 'config'].file = config_file #config_output_url # config_file ) response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation response.outputs['target_netcdf'].file = archive ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 95) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 99) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) process_start_time = time.time() # measure process execution time ... start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ response.update_status('execution started at : %s ' % dt.now(), 5) start_time = time.time() # measure init ... ################################ # reading in the input arguments ################################ try: response.update_status('read input parameter : %s ' % dt.now(), 5) resource = archiveextract(resource=rename_complexinputs(request.inputs['resource'])) refSt = request.inputs['refSt'][0].data refEn = request.inputs['refEn'][0].data dateSt = request.inputs['dateSt'][0].data dateEn = request.inputs['dateEn'][0].data seasonwin = request.inputs['seasonwin'][0].data nanalog = request.inputs['nanalog'][0].data # bbox = [-80, 20, 50, 70] # TODO: Add checking for wrong cordinates and apply default if nesessary #level = 500 level = request.inputs['level'][0].data if (level == 500): dummylevel = 1000 # dummy workaround for cdo sellevel else: dummylevel = 500 LOGGER.debug('LEVEL selected: %s hPa' % (level)) bbox=[] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') #for i in bboxStr: bbox.append(int(i)) bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: %s ' % (bbox)) LOGGER.debug('BBOX original: %s ' % (bboxStr)) # if bbox_obj is not None: # LOGGER.info("bbox_obj={0}".format(bbox_obj.coords)) # bbox = [bbox_obj.coords[0][0], # bbox_obj.coords[0][1], # bbox_obj.coords[1][0], # bbox_obj.coords[1][1]] # LOGGER.info("bbox={0}".format(bbox)) # else: # bbox = None # region = self.getInputValues(identifier='region')[0] # bbox = [float(b) for b in region.split(',')] # bbox_obj = self.BBox.getValue() normalize = request.inputs['normalize'][0].data distance = request.inputs['dist'][0].data outformat = request.inputs['outformat'][0].data timewin = request.inputs['timewin'][0].data # model_var = request.inputs['reanalyses'][0].data # model, var = model_var.split('_') # experiment = self.getInputValues(identifier='experiment')[0] # dataset, var = experiment.split('_') # LOGGER.info('environment set') LOGGER.info('input parameters set') response.update_status('Read in and convert the arguments', 5) except Exception as e: msg = 'failed to read input prameter %s ' % e LOGGER.error(msg) raise Exception(msg) ###################################### # convert types and set environment ###################################### try: # refSt = dt.strptime(refSt[0], '%Y-%m-%d') # refEn = dt.strptime(refEn[0], '%Y-%m-%d') # dateSt = dt.strptime(dateSt[0], '%Y-%m-%d') # dateEn = dt.strptime(dateEn[0], '%Y-%m-%d') #not nesessary if fix ocgis_module.py refSt = dt.combine(refSt,dt_time(12,0)) refEn = dt.combine(refEn,dt_time(12,0)) dateSt = dt.combine(dateSt,dt_time(12,0)) dateEn = dt.combine(dateEn,dt_time(12,0)) # refSt = refSt.replace(hour=12) # refEn = refEn.replace(hour=12) # dateSt = dateSt.replace(hour=12) # dateEn = dateEn.replace(hour=12) if normalize == 'None': seacyc = False else: seacyc = True if outformat == 'ascii': outformat = '.txt' elif outformat == 'netCDF': outformat = '.nc' else: LOGGER.error('output format not valid') start = min(refSt, dateSt) end = max(refEn, dateEn) # if bbox_obj is not None: # LOGGER.info("bbox_obj={0}".format(bbox_obj.coords)) # bbox = [bbox_obj.coords[0][0], # bbox_obj.coords[0][1], # bbox_obj.coords[1][0], # bbox_obj.coords[1][1]] # LOGGER.info("bbox={0}".format(bbox)) # else: # bbox = None LOGGER.info('environment set') except Exception as e: msg = 'failed to set environment %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("init took %s seconds.", time.time() - start_time) response.update_status('Read in and convert the arguments', 5) ######################## # input data preperation ######################## # TODO: Check if files containing more than one dataset response.update_status('Start preparing input data', 12) start_time = time.time() # mesure data preperation ... try: # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...]) if type(resource) == list: #resource.sort() resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource=[resource] #=============================================================== # TODO: REMOVE resources which are out of interest from the list # (years > and < than requested for calculation) tmp_resource = [] for re in resource: s,e = get_timerange(re) tmpSt = dt.strptime(s,'%Y%m%d') tmpEn = dt.strptime(e,'%Y%m%d') if ((tmpSt <= end ) and (tmpEn >= start)): tmp_resource.append(re) LOGGER.debug('Selected file: %s ' % (re)) resource = tmp_resource # =============================================================== #================================================================ # Try to fix memory issue... (ocgis call for files like 20-30 gb... ) # IF 4D - select pressure level before domain cut # # resource properties ds = Dataset(resource[0]) variable = get_variable(resource[0]) var = ds.variables[variable] dims = list(var.dimensions) dimlen = len(dims) try: model_id = ds.getncattr('model_id') except AttributeError: model_id = 'Unknown model' LOGGER.debug('MODEL: %s ' % (model_id)) lev_units = 'hPa' if (dimlen>3) : lev = ds.variables[dims[1]] # actually index [1] need to be detected... assuming zg(time, plev, lat, lon) lev_units = lev.units if (lev_units=='Pa'): level = level*100 dummylevel=dummylevel*100 # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it # Not just level = level * 100. # Get Levels from cdo import Cdo cdo = Cdo() lev_res=[] if(dimlen>3): for res_fn in resource: tmp_f = 'lev_' + path.basename(res_fn) comcdo = '%s,%s' % (level,dummylevel) cdo.sellevel(comcdo, input=res_fn, output=tmp_f) lev_res.append(tmp_f) else: lev_res = resource # Get domain regr_res=[] for res_fn in lev_res: tmp_f = 'dom_' + path.basename(res_fn) comcdo = '%s,%s,%s,%s' % (bbox[0],bbox[2],bbox[1],bbox[3]) cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f) regr_res.append(tmp_f) #archive_tmp = call(resource=resource, time_range=[refSt, refEn], geom=bbox, spatial_wrapping='wrap') #simulation_tmp = call(resource=resource, time_range=[dateSt, dateEn], geom=bbox, spatial_wrapping='wrap') #============================ archive_tmp = call(resource=regr_res, time_range=[refSt, refEn], spatial_wrapping='wrap') simulation_tmp = call(resource=regr_res, time_range=[dateSt, dateEn], spatial_wrapping='wrap') ####################################################################################### # TEMORAL dirty workaround to get the level and it's units - will be func in utils.py if (dimlen>3) : archive = get_level(archive_tmp, level = level) simulation = get_level(simulation_tmp,level = level) variable = 'z%s' % level # TODO: here should be modulated else: archive = archive_tmp simulation = simulation_tmp # 3D, move forward ####################################################################################### if seacyc is True: seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize) else: seasoncyc_base = None seasoncyc_sim = None except Exception as e: msg = 'failed to prepare archive and simulation files %s ' % e LOGGER.debug(msg) raise Exception(msg) ip, output = mkstemp(dir='.', suffix='.txt') output_file = path.abspath(output) files = [path.abspath(archive), path.abspath(simulation), output_file] LOGGER.debug("data preperation took %s seconds.", time.time() - start_time) ############################ # generating the config file ############################ # TODO: add MODEL name as argument response.update_status('writing config file', 15) start_time = time.time() # measure write config ... try: config_file = analogs.get_configfile( files=files, seasoncyc_base=seasoncyc_base, seasoncyc_sim=seasoncyc_sim, base_id=model_id, sim_id=model_id, timewin=timewin, varname=variable, seacyc=seacyc, cycsmooth=91, nanalog=nanalog, seasonwin=seasonwin, distfun=distance, outformat=outformat, calccor=True, silent=False, period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')], bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3])) except Exception as e: msg = 'failed to generate config file %s ' % e LOGGER.debug(msg) raise Exception(msg) LOGGER.debug("write_config took %s seconds.", time.time() - start_time) ############## # CASTf90 call ############## import subprocess import shlex start_time = time.time() # measure call castf90 response.update_status('Start CASTf90 call', 20) try: # response.update_status('execution of CASTf90', 50) cmd = 'analogue.out %s' % path.relpath(config_file) # system(cmd) args = shlex.split(cmd) output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() LOGGER.info('analogue.out info:\n %s ' % output) LOGGER.debug('analogue.out errors:\n %s ' % error) response.update_status('**** CASTf90 suceeded', 70) except Exception as e: msg = 'CASTf90 failed %s ' % e LOGGER.error(msg) raise Exception(msg) LOGGER.debug("castf90 took %s seconds.", time.time() - start_time) response.update_status('preparing output', 70) response.outputs['config'].file = config_file #config_output_url # config_file ) response.outputs['analogs'].file = output_file response.outputs['output_netcdf'].file = simulation ######################## # generate analog viewer ######################## formated_analogs_file = analogs.reformat_analogs(output_file) # response.outputs['formated_analogs'].storage = FileStorage() response.outputs['formated_analogs'].file = formated_analogs_file LOGGER.info('analogs reformated') response.update_status('reformatted analog file', 80) viewer_html = analogs.render_viewer( # configfile=response.outputs['config'].get_url(), configfile=config_file, # datafile=response.outputs['formated_analogs'].get_url()) datafile=formated_analogs_file) response.outputs['output'].file = viewer_html response.update_status('Successfully generated analogs viewer', 90) LOGGER.info('rendered pages: %s ', viewer_html) response.update_status('execution ended', 100) LOGGER.debug("total execution took %s seconds.", time.time() - process_start_time) return response
def signal_noise_ratio(resource=[], start=None, end=None, timeslice=20, variable=None, title=None, cmap='seismic'): """returns the result :param resource: list of paths to netCDF files :param start: beginning of reference period (if None (default), the first year of the consistent ensemble will be detected) :param end: end of comparison period (if None (default), the last year of the consistent ensemble will be detected) :param timeslice: period length for mean calculation of reference and comparison period :param variable: OBSOLETE :param title: str to be used as title for the signal mal :param cmap: define the color scheme for signal map plotting :return: signal.nc, low_agreement_mask.nc, high_agreement_mask.nc, text.txt, # graphic.png, """ from os.path import split from tempfile import mkstemp from cdo import Cdo cdo = Cdo() cdo.forceOutput = True # preparing the resource try: file_dic = sort_by_filename(resource, historical_concatination=True) LOGGER.info('file names sorted experimets: %s' % len(file_dic.keys())) except: msg = 'failed to sort the input files' LOGGER.exception(msg) # check that all datasets contains the same variable try: var_name = set() for key in file_dic.keys(): var_name = var_name.union([get_variable(file_dic[key])]) LOGGER.debug(var_name) except: LOGGER.exception('failed to get the variable in common') if len(var_name) == 1: variable = [str(n) for n in var_name][0] LOGGER.info('varible %s detected in all members of the ensemble' % variable) else: raise Exception( 'none or more than one variables are found in the ensemble members' ) # TODO: drop missfitting grids # timemerge for seperate datasets try: mergefiles = [] for key in file_dic.keys(): # if variable is None: # variable = get_variable(file_dic[key]) # LOGGER.info('variable detected %s ' % variable) try: if type(file_dic[key]) == list and len(file_dic[key]) > 1: _, nc_merge = mkstemp(dir='.', suffix='.nc') mergefiles.append( cdo.mergetime(input=file_dic[key], output=nc_merge)) else: mergefiles.extend(file_dic[key]) except: LOGGER.exception('failed to merge files for %s ' % key) LOGGER.info('datasets merged %s ' % mergefiles) except: msg = 'seltime and mergetime failed' LOGGER.exception(msg) # verify the calendar # find the most common calendar cals = [] n = 0 for nc in mergefiles: cal, util = get_calendar(nc) cals.append(cal) for cal in cals: m = cals.count(cal) if m > n: calendar = cal for c, nc in enumerate(mergefiles): cal, unit = get_calendar(nc) print 'calendar detected: %s most common: %s' % (cal, calendar) if cal != calendar: print 'calendar changed for %s to %s' % (cal, calendar) _, nc_cal = mkstemp(dir='.', suffix='.nc') nc_out = cdo.setcalendar('{0}'.format(calendar), input=nc, output=nc_cal) mergefiles[c] = nc_cal LOGGER.debug('calendar changed for %s' % nc) else: LOGGER.debug('calendar was %s' % cal) # dataset documentation try: text_src = open('infiles.txt', 'a') for key in file_dic.keys(): text_src.write(key + '\n') text_src.close() except: msg = 'failed to write source textfile' LOGGER.exception(msg) _, text_src = mkstemp(dir='.', suffix='.txt') # evaluation # configure reference and compare period st = set() en = set() for key in file_dic.keys(): # TODO: convert 360day calendar s, e = get_timerange(file_dic[key]) st.update([s]) en.update([e]) if start is None: start = list(st)[-1] else: if start < list(st)[-1]: start = list(st)[-1] LOGGER.debug( 'start was befor the first common timestep, set start to the first common timestep' ) if end is None: end = list(en)[0] else: if end > list(en)[0]: end = list(en)[0] LOGGER.debug( 'end was after the last common timestepp, set end to last common timestep ' ) from datetime import datetime as dt from datetime import timedelta start = dt.strptime(start, '%Y%M%d') end = dt.strptime(end, '%Y%M%d') length = end - start # set the periodes: try: if timeslice is None: td = lenth / 3 else: td = timedelta(days=timeslice) if td > length: td = lenth / 3 LOGGER.debug( 'timeslice is larger as whole timeseries! set timeslice to third of timeseries' ) start_td = start + td end_td = end - td LOGGER.info('timeslice and periodes set') except: msg = 'failed to set the periodes' LOGGER.exception(msg) try: files = [] for i, mf in enumerate(mergefiles): files.append( cdo.selyear('{0}/{1}'.format(start.year, end.year), input=[mf.replace(' ', '\ ')], output='file_{0}_.nc'.format(i))) # python version LOGGER.info('timeseries selected from defined start to end year') except: msg = 'seltime and mergetime failed' LOGGER.exception(msg) try: # ensemble mean nc_ensmean = cdo.ensmean(input=files, output='nc_ensmean.nc') LOGGER.info('ensemble mean calculation done') except: msg = 'ensemble mean failed' LOGGER.exception(msg) try: # ensemble std nc_ensstd = cdo.ensstd(input=files, output='nc_ensstd.nc') LOGGER.info('ensemble std and calculation done') except: msg = 'ensemble std or failed' LOGGER.exception(msg) # get the get the signal as difference from the beginning (first years) and end period (last years), : try: selyearstart = cdo.selyear('%s/%s' % (start.year, start_td.year), input=nc_ensmean, output='selyearstart.nc') selyearend = cdo.selyear('%s/%s' % (end_td.year, end.year), input=nc_ensmean, output='selyearend.nc') meanyearst = cdo.timmean(input=selyearstart, output='meanyearst.nc') meanyearend = cdo.timmean(input=selyearend, output='meanyearend.nc') signal = cdo.sub(input=[meanyearend, meanyearst], output='signal.nc') LOGGER.info('Signal calculation done') except: msg = 'calculation of signal failed' LOGGER.exception(msg) _, signal = mkstemp(dir='.', suffix='.nc') # get the intermodel standard deviation (mean over whole period) try: # std_selyear = cdo.selyear('%s/%s' % (end1,end2), input=nc_ensstd, output='std_selyear.nc') # std = cdo.timmean(input = std_selyear, output = 'std.nc') std = cdo.timmean(input=nc_ensstd, output='std.nc') std2 = cdo.mulc('2', input=std, output='std2.nc') LOGGER.info('calculation of internal model std for time period done') except: msg = 'calculation of internal model std failed' LOGGER.exception(msg) try: # absolut = cdo.abs(input=signal, output='absolut_signal.nc') # don't get the sence of this step :-) high_agreement_mask = cdo.gt(input=[signal, std2], output='signal_larger_than_noise.nc') low_agreement_mask = cdo.lt(input=[signal, std], output='signal_smaller_than_noise.nc') LOGGER.info('high and low mask done') except: msg = 'calculation of robustness mask failed' LOGGER.exception(msg) _, high_agreement_mask = mkstemp(dir='.', suffix='.nc') _, low_agreement_mask = mkstemp(dir='.', suffix='.nc') return signal, low_agreement_mask, high_agreement_mask, text_src
def _handler(self, request, response): init_process_logger('log.txt') response.outputs['output_log'].file = 'log.txt' LOGGER.info('Start process') response.update_status('execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ try: response.update_status( 'execution started at : {}'.format(dt.now()), 5) ################################ # reading in the input arguments ################################ LOGGER.info('read in the arguments') resource = archiveextract( resource=[res.file for res in request.inputs['resource']]) # If files are from different datasets. # i.e. files: ...output1/slp.1999.nc and ...output2/slp.1997.nc will not be sorted with just .sort() # So: if type(resource) == list: resource = sorted( resource, key=lambda i: path.splitext(path.basename(i))[0]) else: resource = [resource] # resources = self.getInputValues(identifier='resources') season = request.inputs['season'][0].data LOGGER.info('season {}'.format(season)) # if 'bbox' in request.inputs: # bbox = request.inputs['bbox'][0].data # bbox = [-80, 20, 50, 70] # else: # bbox = [-80, 20, 50, 70] bbox = [] bboxStr = request.inputs['BBox'][0].data bboxStr = bboxStr.split(',') bbox.append(float(bboxStr[0])) bbox.append(float(bboxStr[2])) bbox.append(float(bboxStr[1])) bbox.append(float(bboxStr[3])) LOGGER.debug('BBOX for ocgis: {}'.format(bbox)) LOGGER.debug('BBOX original: {}'.format(bboxStr)) period = request.inputs['period'][0].data LOGGER.info('period: {}'.format(period)) anualcycle = request.inputs['anualcycle'][0].data kappa = request.inputs['kappa'][0].data LOGGER.info('kappa: {}'.format(kappa)) method = request.inputs['method'][0].data LOGGER.info('Calc annual cycle with {}'.format(method)) sseas = request.inputs['sseas'][0].data LOGGER.info('Annual cycle calc with {}'.format(sseas)) start = dt.strptime(period.split('-')[0], '%Y%m%d') end = dt.strptime(period.split('-')[1], '%Y%m%d') # OCGIS for models workaround - to catch 31 of Dec start = dt.combine(start, dt_time(12, 0)) end = dt.combine(end, dt_time(12, 0)) cycst = anualcycle.split('-')[0] cycen = anualcycle.split('-')[1] reference = [ dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d') ] LOGGER.debug('Reference start: {0}, end: {1}'.format( reference[0], reference[1])) reference[0] = dt.combine(reference[0], dt_time(12, 0)) reference[1] = dt.combine(reference[1], dt_time(12, 0)) LOGGER.debug('New Reference start: {0}, end: {1}'.format( reference[0], reference[1])) # Check if 360_day calendar (all months are exactly 30 days): try: if type(resource) is not list: resource = [resource] modcal, calunits = get_calendar(resource[0]) if '360_day' in modcal: if start.day == 31: start = start.replace(day=30) LOGGER.debug( 'Date has been changed for: {}'.format(start)) if end.day == 31: end = end.replace(day=30) LOGGER.debug( 'Date has been changed for: {}'.format(end)) if reference[0].day == 31: reference[0] = reference[0].replace(day=30) LOGGER.debug('Date has been changed for: {}'.format( reference[0])) if reference[1].day == 31: reference[1] = reference[1].replace(day=30) LOGGER.debug('Date has been changed for: {}'.format( reference[1])) except Exception as ex: msg = 'Could not detect calendar: {}'.format(ex) LOGGER.debug(msg) raise Exception(msg) LOGGER.debug('start: {0}, end: {1}'.format(start, end)) LOGGER.info('bbox: {}'.format(bbox)) LOGGER.info('period {}'.format(period)) LOGGER.info('season {}'.format(season)) except Exception as ex: msg = 'failed to read in the arguments: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) ############################################################ # get the required bbox and time region from resource data ############################################################ response.update_status('start subsetting', 17) # from flyingpigeon.weatherregimes import get_level from flyingpigeon.ocgis_module import call from flyingpigeon.utils import get_variable, get_timerange time_range = [start, end] tmp_resource = [] for re in resource: s, e = get_timerange(re) tmpSt = dt.strptime(s, '%Y%m%d') tmpEn = dt.strptime(e, '%Y%m%d') if (tmpSt <= end) and (tmpEn >= start): tmp_resource.append(re) LOGGER.debug('Selected file: {}'.format(re)) resource = tmp_resource # Here start trick with z... levels and regriding... # Otherwise call will give memory error for hires models with geop # TODO: Add level and domain selection as in wps_analogs_model for 4D var. variable = get_variable(resource) model_subset = call( resource=resource, variable=variable, geom=bbox, spatial_wrapping='wrap', time_range=time_range, # conform_units_to=conform_units_to ) LOGGER.info('Dataset subset done: {}'.format(model_subset)) response.update_status('dataset subsetted', 18) ##################### # computing anomalies ##################### response.update_status('computing anomalies ', 19) model_anomal = wr.get_anomalies(model_subset, reference=reference, method=method, sseas=sseas) ################### # extracting season #################### model_season = wr.get_season(model_anomal, season=season) response.update_status('values normalized', 20) #################### # call the R scripts #################### response.update_status('Start weather regime clustering ', 50) import subprocess from flyingpigeon import config from os.path import curdir, join try: rworkspace = curdir Rsrc = config.Rsrc_dir() Rfile = 'weatherregimes_model.R' infile = model_season # model_subset #model_ponderate modelname = 'MODEL' yr1 = start.year yr2 = end.year ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf') ip, file_pca = mkstemp(dir=curdir, suffix='.txt') ip, file_class = mkstemp(dir=curdir, suffix='.Rdat') # TODO: Rewrite this using os.path.join or pathlib libraries args = [ 'Rscript', join(Rsrc, Rfile), '%s/' % curdir, '%s/' % Rsrc, '%s' % infile, '%s' % variable, '%s' % output_graphics, '%s' % file_pca, '%s' % file_class, '%s' % season, '%s' % start.year, '%s' % end.year, '%s' % 'MODEL', '%s' % kappa ] LOGGER.info('Rcall builded') LOGGER.debug('ARGS: {}'.format(args)) except Exception as ex: msg = 'failed to build the R command {}'.format(ex) LOGGER.error(msg) raise Exception(msg) try: output, error = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() # ,shell=True LOGGER.info('R outlog info:\n {}'.format(output)) LOGGER.debug('R outlog errors:\n {}'.format(error)) if len(output) > 0: response.update_status('**** weatherregime in R suceeded', 90) else: LOGGER.error('NO! output returned from R call') except Exception as ex: msg = 'failed to run the R weatherregime: {}'.format(ex) LOGGER.exception(msg) raise Exception(msg) response.update_status('Weather regime clustering done ', 92) ############################################ # set the outputs ############################################ response.update_status('Set the process outputs ', 95) # bla=bla response.outputs['Routput_graphic'].file = output_graphics response.outputs['output_pca'].file = file_pca response.outputs['output_classification'].file = file_class response.outputs['output_netcdf'].file = model_season response.update_status('done', 100) return response