def makeRandomDir(self,tmpDir="/dev/shm/"): # make a random (temporary) directory (default: in the memory) randomDir = tmpDir + vos.get_random_word() directoryExist = True while directoryExist: try: os.makedirs(randomDir) directoryExist = False self.randomDirList.append(randomDir) except: # generate another random directory randomDir = tmpDir + vos.get_random_word() return randomDir
def makeRandomDir(self,tmpDir): # make a random (temporary) directory (default: in the memory) randomDir = tmpDir + vos.get_random_word() directoryExist = True while directoryExist: try: os.makedirs(randomDir) directoryExist = False self.randomDirList.append(randomDir) except: # generate another random directory randomDir = tmpDir + vos.get_random_word() return randomDir
def __init__(self, input_files,\ output_files,\ modelTime,\ main_tmp_dir = "/dev/shm/"): DynamicModel.__init__(self) self.input_files = input_files self.output_files = output_files self.modelTime = modelTime # main temporary directory self.main_tmp_dir = main_tmp_dir+"/"+vos.get_random_word() # make the temporary directory if not exist yet try: os.makedirs(self.main_tmp_dir) except: os.system('rm -r '+str(self.main_tmp_dir)+'*') os.makedirs(self.main_tmp_dir) # clone map for pcraster process - depend on the resolution of the basin/catchment map pcr.setclone(self.input_files["basin30minmap"]) self.clone_map = pcr.boolean(1.0) # # catchment ids map self.catchment = pcr.nominal(\ pcr.readmap(self.input_files["basin30minmap"])) self.catchment = pcr.ifthen(pcr.scalar(self.catchment) > 0.0,\ self.catchment) # cell area map self.cell_area = pcr.cover(pcr.readmap(self.input_files["area30min_map"]), 0.0) # prepare grace monthly and annual anomaly time series self.pre_process_grace_file() # prepare model monthly and annual anomaly time series self.pre_process_model_file() # prepare object for writing netcdf files: self.output = OutputNetcdf(self.input_files["area30min_map"]) self.output.createNetCDF(self.output_files['basinscale_tws_month_anomaly']['grace'], "lwe_thickness","m") self.output.createNetCDF(self.output_files['basinscale_tws_month_anomaly']['model'], "pcrglobwb_tws","m") self.output.createNetCDF(self.output_files['basinscale_tws_annua_anomaly']['grace'], "lwe_thickness","m") self.output.createNetCDF(self.output_files['basinscale_tws_annua_anomaly']['model'], "pcrglobwb_tws","m")
def evaluateModelResultsToGRDC(self,id,pcrglobwb_output,catchmentClassFileName,tmpDir): try: # open and crop the netcdf file that contains the result ncFile = pcrglobwb_output['folder']+"/"+pcrglobwb_output["netcdf_file_name"] # for high resolution output, the netcdf files are usually splitted in several files if catchmentClassFileName != None: # identify the landmask landmaskCode = str(self.attributeGRDC["model_landmask"][str(id)]) if int(landmaskCode) < 10: landmaskCode = "0"+landmaskCode # identify the landmask - # TODO: THIS MUST BE FIXED ncFile = "/projects/wtrcycle/users/edwinhs/two_layers_with_demand_one_degree_zonation_cruts3.21-era_interim_5arcmin_but_30minArno"+"/M"+landmaskCode+"/netcdf/discharge_monthAvg_output.nc" logger.info("Reading and evaluating the model result for the grdc station "+str(id)+" from "+ncFile) if ncFile in filecache.keys(): f = filecache[ncFile] print "Cached: ", ncFile else: f = nc.Dataset(ncFile) filecache[ncFile] = f print "New: ", ncFile # varName = pcrglobwb_output["netcdf_variable_name"] try: f.variables['lat'] = f.variables['latitude'] f.variables['lon'] = f.variables['longitude'] except: pass #~ # #~ # IN PROGRESS swap rows if needed ?? - It seems that this one is not necessary. #~ if f.variables['lat'][0] < f.variables['lat'][1]: #~ f.variables[varName][:] = self.swapRows(f.variables[varName][:]) #~ f.variables['lat'][:] = f.variables['lat'][::-1] # identify row and column indexes: # lon = float(self.attributeGRDC["model_longitude_in_arc_degree"][str(id)]) minX = min(abs(f.variables['lon'][:] - lon)) xStationIndex = int(np.where(abs(f.variables['lon'][:] - lon) == minX)[0]) # lat = float(self.attributeGRDC["model_latitude_in_arc_degree"][str(id)]) minY = min(abs(f.variables['lat'][:] - lat)) yStationIndex = int(np.where(abs(f.variables['lat'][:] - lat) == minY)[0]) # cropping the data: cropData = f.variables[varName][:,yStationIndex,xStationIndex] # select specific ranges of date/year nctime = f.variables['time'] # A netCDF time variable object. cropTime = nctime[:] if (self.startDate != None) and (self.endDate != None): idx_start = nc.date2index(self.startDate, \ nctime, \ calendar = nctime.calendar, \ select = 'exact') idx_end = nc.date2index(self.endDate, \ nctime, \ calendar = nctime.calendar, \ select = 'exact') cropData = cropData[int(idx_start):int(idx_end+1)] cropTime = cropTime[int(idx_start):int(idx_end+1)] cropData = np.column_stack((cropTime,cropData)) print(cropData) # make a randomDir containing txt files (attribute and model result): randomDir = self.makeRandomDir(tmpDir) txtModelFile = randomDir+"/"+vos.get_random_word()+".txt" # write important attributes to a .atr file # atrModel = open(txtModelFile+".atr","w") atrModel.write("# grdc_id: " +str(self.attributeGRDC["id_from_grdc"][str(id)])+"\n") atrModel.write("# country_code: " +str(self.attributeGRDC["country_code"][str(id)])+"\n") atrModel.write("# river_name: " +str(self.attributeGRDC["river_name"][str(id)])+"\n") atrModel.write("# station_name: " +str(self.attributeGRDC["station_name"][str(id)])+"\n") atrModel.write("# grdc_catchment_area_in_km2: " +str(self.attributeGRDC["grdc_catchment_area_in_km2"][str(id)])+"\n") # atrModel.write("# model_landmask: " +str(self.attributeGRDC["model_landmask"][str(id)])+"\n") atrModel.write("# model_latitude: " +str(self.attributeGRDC["model_latitude_in_arc_degree"][str(id)])+"\n") atrModel.write("# model_longitude: " +str(self.attributeGRDC["model_longitude_in_arc_degree"][str(id)])+"\n") atrModel.write("# model_catchment_area_in_km2: "+str(self.attributeGRDC["model_catchment_area_in_km2"][str(id)])+"\n") atrModel.write("####################################################################################\n") atrModel.close() # save cropData to a .txt file: txtModel = open(txtModelFile,"w") np.savetxt(txtModelFile,cropData,delimiter=";") # two columns with date and model_result txtModel.close() # run R for evaluation cmd = 'R -f evaluateMonthlyDischarge.R '+self.attributeGRDC["grdc_file_name"][str(id)]+' '+txtModelFile print(cmd); os.system(cmd) # get model performance: read the output file (from R) try: outputFile = txtModelFile+".out" f = open(outputFile) ; allLines = f.read() ; f.close() # split the content of the file into several lines allLines = allLines.replace("\r",""); allLines = allLines.split("\n") # performance values performance = allLines[2].split(";") # nPairs = float(performance[0]) avg_obs = float(performance[1]) avg_sim = float(performance[2]) NSeff = float(performance[3]) NSeff_log = float(performance[4]) rmse = float(performance[5]) mae = float(performance[6]) bias = float(performance[7]) R2 = float(performance[8]) R2ad = float(performance[9]) correlation = float(performance[10]) # table_file_name = self.tableOutputDir+"/"+\ str(self.attributeGRDC["country_code"][str(id)])+"_"+\ str(self.attributeGRDC["river_name"][str(id)]) +"_"+\ str(self.attributeGRDC["id_from_grdc"][str(id)])+"_"+\ str(self.attributeGRDC["station_name"][str(id)])+"_"+\ "table.txt" cmd = 'cp '+txtModelFile+".out "+table_file_name print(cmd); os.system(cmd) logger.info("Copying the model result for the grdc station "+str(id)+" to a column/txt file: "+str(table_file_name)+".") # chart_file_name = self.chartOutputDir+"/"+\ str(self.attributeGRDC["country_code"][str(id)])+"_"+\ str(self.attributeGRDC["river_name"][str(id)]) +"_"+\ str(self.attributeGRDC["id_from_grdc"][str(id)])+"_"+\ str(self.attributeGRDC["station_name"][str(id)])+"_"+\ "chart.pdf" cmd = 'cp '+txtModelFile+".out.pdf "+chart_file_name print(cmd); os.system(cmd) logger.info("Saving the time series plot for the grdc station "+str(id)+" to a pdf file: "+str(chart_file_name)+".") except: nPairs = "NA" avg_obs = "NA" avg_sim = "NA" NSeff = "NA" NSeff_log = "NA" rmse = "NA" mae = "NA" bias = "NA" R2 = "NA" R2ad = "NA" correlation = "NA" chart_file_name = "NA" table_file_name = "NA" logger.info("Evaluation model result to the grdc observation can NOT be performed.") # clean (random) temporary directory self.cleanRandomDir(randomDir) self.attributeGRDC["num_of_month_pairs"][str(id)] = nPairs self.attributeGRDC["average_observation"][str(id)] = avg_obs self.attributeGRDC["average_model"][str(id)] = avg_sim self.attributeGRDC["ns_efficiency"][str(id)] = NSeff self.attributeGRDC["ns_efficiency_log"][str(id)] = NSeff_log self.attributeGRDC["rmse"][str(id)] = rmse self.attributeGRDC["mae"][str(id)] = mae self.attributeGRDC["bias"][str(id)] = bias self.attributeGRDC["R2"][str(id)] = R2 self.attributeGRDC["R2_adjusted"][str(id)] = R2ad self.attributeGRDC["correlation"][str(id)] = correlation self.attributeGRDC["chart_file_name"][str(id)] = chart_file_name self.attributeGRDC["table_file_name"][str(id)] = table_file_name except: logger.info("Evaluation model result to the grdc observation can NOT be performed.")
def identifyModelPixel(self,tmpDir,\ catchmentAreaAll,\ landMaskClass,\ xCoordinate,yCoordinate,id): # TODO: Include an option to consider average discharge. logger.info("Identify model pixel for the grdc station "+str(id)+".") # make a temporary directory: randomDir = self.makeRandomDir(tmpDir) # coordinate of grdc station xCoord = float(self.attributeGRDC["grdc_longitude_in_arc_degree"][str(id)]) yCoord = float(self.attributeGRDC["grdc_latitude_in_arc_degree"][str(id)]) # identify the point at pcraster model point = pcr.ifthen((pcr.abs(xCoordinate - xCoord) == pcr.mapminimum(pcr.abs(xCoordinate - xCoord))) &\ (pcr.abs(yCoordinate - yCoord) == pcr.mapminimum(pcr.abs(yCoordinate - yCoord))), \ pcr.boolean(1)) # expanding the point point = pcr.windowmajority(point, self.cell_size_in_arc_degree * 5.0) point = pcr.ifthen(catchmentAreaAll > 0, point) point = pcr.boolean(point) # values based on the model; modelCatchmentArea = pcr.ifthen(point, catchmentAreaAll) # unit: km2 model_x_ccordinate = pcr.ifthen(point, xCoordinate) # unit: arc degree model_y_ccordinate = pcr.ifthen(point, yCoordinate) # unit: arc degree # calculate (absolute) difference with GRDC data # - initiating all of them with the values of MV diffCatchArea = pcr.abs(pcr.scalar(vos.MV)) # difference between the model and grdc catchment area (unit: km2) diffDistance = pcr.abs(pcr.scalar(vos.MV)) # distance between the model pixel and grdc catchment station (unit: arc degree) diffLongitude = pcr.abs(pcr.scalar(vos.MV)) # longitude difference (unit: arc degree) diffLatitude = pcr.abs(pcr.scalar(vos.MV)) # latitude difference (unit: arc degree) # # - calculate (absolute) difference with GRDC data try: diffCatchArea = pcr.abs(modelCatchmentArea-\ float(self.attributeGRDC["grdc_catchment_area_in_km2"][str(id)])) except: logger.info("The difference in the model and grdc catchment area cannot be calculated.") try: diffLongitude = pcr.abs(model_x_ccordinate - xCoord) except: logger.info("The difference in longitude cannot be calculated.") try: diffLatitude = pcr.abs(model_y_ccordinate - yCoord) except: logger.info("The difference in latitude cannot be calculated.") try: diffDistance = (diffLongitude**(2) + \ diffLatitude**(2))**(0.5) # TODO: calculate distance in meter except: logger.info("Distance cannot be calculated.") # identify masks masks = pcr.ifthen(pcr.boolean(point), landMaskClass) # export the difference to temporary files: maps and txt catchmentAreaMap = randomDir+"/"+vos.get_random_word()+".area.map" diffCatchAreaMap = randomDir+"/"+vos.get_random_word()+".dare.map" diffDistanceMap = randomDir+"/"+vos.get_random_word()+".dist.map" diffLatitudeMap = randomDir+"/"+vos.get_random_word()+".dlat.map" diffLongitudeMap = randomDir+"/"+vos.get_random_word()+".dlon.map" diffLatitudeMap = randomDir+"/"+vos.get_random_word()+".dlat.map" # maskMap = randomDir+"/"+vos.get_random_word()+".mask.map" diffColumnFile = randomDir+"/"+vos.get_random_word()+".cols.txt" # output # pcr.report(pcr.ifthen(point,modelCatchmentArea), catchmentAreaMap) pcr.report(pcr.ifthen(point,diffCatchArea ), diffCatchAreaMap) pcr.report(pcr.ifthen(point,diffDistance ), diffDistanceMap ) pcr.report(pcr.ifthen(point,diffLatitude ), diffLongitudeMap) pcr.report(pcr.ifthen(point,diffLongitude ), diffLatitudeMap ) pcr.report(pcr.ifthen(point,masks ), maskMap) # cmd = 'map2col '+catchmentAreaMap +' '+\ diffCatchAreaMap +' '+\ diffDistanceMap +' '+\ diffLongitudeMap +' '+\ diffLatitudeMap +' '+\ maskMap+' '+diffColumnFile print(cmd); os.system(cmd) # use R to sort the file cmd = 'R -f saveIdentifiedPixels.R '+diffColumnFile print(cmd); os.system(cmd) try: # read the output file (from R) f = open(diffColumnFile+".sel") ; allLines = f.read() ; f.close() # split the content of the file into several lines allLines = allLines.replace("\r",""); allLines = allLines.split("\n") selectedPixel = allLines[0].split(";") model_longitude_in_arc_degree = float(selectedPixel[0]) model_latitude_in_arc_degree = float(selectedPixel[1]) model_catchment_area_in_km2 = float(selectedPixel[2]) model_landmask = str(selectedPixel[7]) log_message = "Model pixel for grdc station "+str(id)+" is identified (lat/lon in arc degree): " log_message += str(model_latitude_in_arc_degree) + " ; " + str(model_longitude_in_arc_degree) logger.info(log_message) self.attributeGRDC["model_longitude_in_arc_degree"][str(id)] = model_longitude_in_arc_degree self.attributeGRDC["model_latitude_in_arc_degree"][str(id)] = model_latitude_in_arc_degree self.attributeGRDC["model_catchment_area_in_km2"][str(id)] = model_catchment_area_in_km2 self.attributeGRDC["model_landmask"][str(id)] = model_landmask except: logger.info("Model pixel for grdc station "+str(id)+" can NOT be identified.") self.cleanRandomDir(randomDir)
def evaluateModelResultsToGRDC(self,id,pcrglobwb_output,catchmentClassFileName,tmpDir): try: # open and crop the netcdf file that contains the result ncFile = pcrglobwb_output['folder']+"/"+pcrglobwb_output["netcdf_file_name"] # for high resolution output, the netcdf files are usually splitted in several files if catchmentClassFileName != None: # identify the landmask landmaskCode = str(self.attributeGRDC["model_landmask"][str(id)]) if int(landmaskCode) < 10: landmaskCode = "0"+landmaskCode # identify the landmask - # TODO: THIS MUST BE FIXED ncFile = "/projects/wtrcycle/users/edwinhs/two_layers_with_demand_one_degree_zonation_cruts3.21-era_interim_5arcmin_but_30minArno"+"/M"+landmaskCode+"/netcdf/discharge_monthAvg_output.nc" logger.info("Reading and evaluating the model result for the grdc station "+str(id)+" from "+ncFile) if ncFile in filecache.keys(): f = filecache[ncFile] print "Cached: ", ncFile else: f = nc.Dataset(ncFile) filecache[ncFile] = f print "New: ", ncFile # varName = pcrglobwb_output["netcdf_variable_name"] try: f.variables['lat'] = f.variables['latitude'] f.variables['lon'] = f.variables['longitude'] except: pass #~ # #~ # IN PROGRESS swap rows if needed ?? - It seems that this one is not necessary. #~ if f.variables['lat'][0] < f.variables['lat'][1]: #~ f.variables[varName][:] = self.swapRows(f.variables[varName][:]) #~ f.variables['lat'][:] = f.variables['lat'][::-1] # identify row and column indexes: # lon = float(self.attributeGRDC["model_longitude_in_arc_degree"][str(id)]) minX = min(abs(f.variables['lon'][:] - lon)) xStationIndex = int(np.where(abs(f.variables['lon'][:] - lon) == minX)[0]) # lat = float(self.attributeGRDC["model_latitude_in_arc_degree"][str(id)]) minY = min(abs(f.variables['lat'][:] - lat)) yStationIndex = int(np.where(abs(f.variables['lat'][:] - lat) == minY)[0]) # cropping the data: cropData = f.variables[varName][:,yStationIndex,xStationIndex] # select specific ranges of date/year nctime = f.variables['time'] # A netCDF time variable object. cropTime = nctime[:] if (self.startDate != None) and (self.endDate != None): idx_start = nc.date2index(self.startDate, \ nctime, \ calendar = nctime.calendar, \ select = 'exact') idx_end = nc.date2index(self.endDate, \ nctime, \ calendar = nctime.calendar, \ select = 'exact') cropData = cropData[int(idx_start):int(idx_end+1)] cropTime = cropTime[int(idx_start):int(idx_end+1)] cropData = np.column_stack((cropTime,cropData)) print(cropData) # make a randomDir containing txt files (attribute and model result): randomDir = self.makeRandomDir(tmpDir) txtModelFile = randomDir+"/"+vos.get_random_word()+".txt" # write important attributes to a .atr file # atrModel = open(txtModelFile+".atr","w") atrModel.write("# grdc_id: " +str(self.attributeGRDC["id_from_grdc"][str(id)])+"\n") atrModel.write("# country_code: " +str(self.attributeGRDC["country_code"][str(id)])+"\n") atrModel.write("# river_name: " +str(self.attributeGRDC["river_name"][str(id)])+"\n") atrModel.write("# station_name: " +str(self.attributeGRDC["station_name"][str(id)])+"\n") atrModel.write("# grdc_catchment_area_in_km2: " +str(self.attributeGRDC["grdc_catchment_area_in_km2"][str(id)])+"\n") # atrModel.write("# model_landmask: " +str(self.attributeGRDC["model_landmask"][str(id)])+"\n") atrModel.write("# model_latitude: " +str(self.attributeGRDC["model_latitude_in_arc_degree"][str(id)])+"\n") atrModel.write("# model_longitude: " +str(self.attributeGRDC["model_longitude_in_arc_degree"][str(id)])+"\n") atrModel.write("# model_catchment_area_in_km2: "+str(self.attributeGRDC["model_catchment_area_in_km2"][str(id)])+"\n") atrModel.write("####################################################################################\n") atrModel.close() # save cropData to a .txt file: txtModel = open(txtModelFile,"w") np.savetxt(txtModelFile,cropData,delimiter=";") # two columns with date and model_result txtModel.close() # run R for evaluation print self.attributeGRDC["grdc_file_name"][str(id)] cmd = 'R -f evaluateMonthlyDischarge.R '+self.attributeGRDC["grdc_file_name"][str(id)]+' '+txtModelFile print(cmd); os.system(cmd) # get model performance: read the output file (from R) try: outputFile = txtModelFile+".out" f = open(outputFile) ; allLines = f.read() ; f.close() # split the content of the file into several lines allLines = allLines.replace("\r",""); allLines = allLines.split("\n") # performance values performance = allLines[2].split(";") # nPairs = float(performance[0]) avg_obs = float(performance[1]) avg_sim = float(performance[2]) NSeff = float(performance[3]) NSeff_log = float(performance[4]) rmse = float(performance[5]) mae = float(performance[6]) bias = float(performance[7]) R2 = float(performance[8]) R2ad = float(performance[9]) correlation = float(performance[10]) # table_file_name = self.tableOutputDir+"/"+\ str(self.attributeGRDC["country_code"][str(id)])+"_"+\ str(self.attributeGRDC["river_name"][str(id)]) +"_"+\ str(self.attributeGRDC["id_from_grdc"][str(id)])+"_"+\ str(self.attributeGRDC["station_name"][str(id)])+"_"+\ "table.txt" cmd = 'cp '+txtModelFile+".out "+table_file_name print(cmd); os.system(cmd) logger.info("Copying the model result for the grdc station "+str(id)+" to a column/txt file: "+str(table_file_name)+".") # chart_file_name = self.chartOutputDir+"/"+\ str(self.attributeGRDC["country_code"][str(id)])+"_"+\ str(self.attributeGRDC["river_name"][str(id)]) +"_"+\ str(self.attributeGRDC["id_from_grdc"][str(id)])+"_"+\ str(self.attributeGRDC["station_name"][str(id)])+"_"+\ "chart.pdf" cmd = 'cp '+txtModelFile+".out.pdf "+chart_file_name print(cmd); os.system(cmd) logger.info("Saving the time series plot for the grdc station "+str(id)+" to a pdf file: "+str(chart_file_name)+".") except: nPairs = "NA" avg_obs = "NA" avg_sim = "NA" NSeff = "NA" NSeff_log = "NA" rmse = "NA" mae = "NA" bias = "NA" R2 = "NA" R2ad = "NA" correlation = "NA" chart_file_name = "NA" table_file_name = "NA" logger.info("Evaluation model result to the grdc observation can NOT be performed.") # clean (random) temporary directory self.cleanRandomDir(randomDir) self.attributeGRDC["num_of_month_pairs"][str(id)] = nPairs self.attributeGRDC["average_observation"][str(id)] = avg_obs self.attributeGRDC["average_model"][str(id)] = avg_sim self.attributeGRDC["ns_efficiency"][str(id)] = NSeff self.attributeGRDC["ns_efficiency_log"][str(id)] = NSeff_log self.attributeGRDC["rmse"][str(id)] = rmse self.attributeGRDC["mae"][str(id)] = mae self.attributeGRDC["bias"][str(id)] = bias self.attributeGRDC["R2"][str(id)] = R2 self.attributeGRDC["R2_adjusted"][str(id)] = R2ad self.attributeGRDC["correlation"][str(id)] = correlation self.attributeGRDC["chart_file_name"][str(id)] = chart_file_name self.attributeGRDC["table_file_name"][str(id)] = table_file_name except: logger.info("Evaluation model result to the grdc observation can NOT be performed.")