def selectEmFiles(**kwargs): # Pour A et L : mon, day1, day2, 6hLev, 6hPlev, 3h simulation = kwargs['simulation'] frequency = kwargs['frequency'] variable = kwargs['variable'] period = kwargs['period'] realm = kwargs['realm'] # freqs = {"mon": "", "3h": "_3h"} f = frequency if f in freqs: f = freqs[f] rep = [] # Must look for all realms, here identified by a single letter if realm == "*": lrealm = ["A", "L", "O", "I"] else: lrealm = [realm] for realm in lrealm: clogger.debug("Looking for realm " + realm) # Use EM data for finding data dir freq_for_em = f if realm == 'I': freq_for_em = "" # This is a special case ... command = [ "grep", "^export EM_DIRECTORY_" + realm + freq_for_em + "=", os.path.expanduser(os.getenv("EM_HOME")) + "/expe_" + simulation ] try: ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except: clogger.error("Issue getting archive_location for " + simulation + " for realm " + realm + " with: " + repr(command)) break if ex.wait() == 0: dir = ex.stdout.read().split("=")[1].replace('"', "").replace("\n", "") clogger.debug("Looking at dir " + dir) if os.path.exists(dir): lfiles = os.listdir(dir) for fil in lfiles: # clogger.debug("Looking at file "+fil) fileperiod = periodOfEmFile(fil, realm, f) if fileperiod and period.intersects(fileperiod): if fileHasVar(dir + "/" + fil, variable): rep.append(dir + "/" + fil) # clogger.debug("Done with Looking at file "+fil) else: clogger.error( "Directory %s does not exist for simulation %s, realm %s " "and frequency %s" % (dir, simulation, realm, f)) else: clogger.info("No archive location found for " + simulation + " for realm " + realm + " with: " + repr(command)) return rep
def selectEmFiles(**kwargs) : #POur A et L : mon, day1, day2, 6hLev, 6hPlev, 3h simulation=kwargs['simulation'] frequency=kwargs['frequency'] variable=kwargs['variable'] period=kwargs['period'] realm=kwargs['realm'] # freqs={ "mon" : "" , "3h" : "_3h"} f=frequency if f in freqs : f=freqs[f] rep=[] # Must look for all realms, here identified by a single letter if realm=="*" : lrealm= ["A", "L", "O", "I" ] else: lrealm=[ realm ] for realm in lrealm : clogger.debug("Looking for realm "+realm) # Use EM data for finding data dir freq_for_em=f if realm == 'I' : freq_for_em="" # This is a special case ... command=["grep", "^export EM_DIRECTORY_"+realm+freq_for_em+"=", os.path.expanduser(os.getenv("EM_HOME"))+"/expe_"+simulation ] try : ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except : clogger.error("Issue getting archive_location for "+ simulation+" for realm "+realm+" with: "+`command`) break if ex.wait()==0 : dir=ex.stdout.read().split("=")[1].replace('"',"").replace("\n","") clogger.debug("Looking at dir "+dir) if os.path.exists(dir) : lfiles= os.listdir(dir) for fil in lfiles : #clogger.debug("Looking at file "+fil) fileperiod=periodOfEmFile(fil,realm,f) if fileperiod and period.intersects(fileperiod) : if fileHasVar(dir+"/"+fil,variable) : rep.append(dir+"/"+fil) #clogger.debug("Done with Looking at file "+fil) else : clogger.error("Directory %s does not exist for EM simulation %s, realm %s " "and frequency %s"%(dir,simulation,realm,f)) else : clogger.info("No archive location found for "+ simulation+" for realm "+realm+" with: "+`command`) return rep
def selectExampleFiles(urls,**kwargs) : rep=[] if (kwargs['frequency'] == "monthly") : for l in urls : for realm in ["A","L"] : #dir=l+"/"+realm+"/Origin/Monthly/"+simulation dir=l+"/"+realm clogger.debug("Looking at dir "+dir) if os.path.exists(dir) : lfiles= os.listdir(dir) for f in lfiles : clogger.debug("Looking at file "+f) fileperiod=periodOfEmFile(f,realm,'mon') if fileperiod and fileperiod.intersects(kwargs['period']) : if fileHasVar(dir+"/"+f,kwargs['variable']) : rep.append(dir+"/"+f) #else: print "No var ",variable," in file", dir+"/"+f return rep
def selectGenericFiles(urls, return_wildcards=None,merge_periods_on=None,**kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory. However, for remote files, filename pattern must include ${varname}, which is instanciated by variable name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is for the sake of efficiency (please complain if inadequate) Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*${PERIOD}*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - ${PERIOD} : use it for indicating the period covered by each file, if this is applicable in the file naming; this period can appear in filenames as YYYY, YYYYMM, YYYYMMDD, YYYYMMDDHHMM, either once only, or twice with separator ='-' or '_' - wildcards '?' and '*' for matching respectively one and any number of characters """ def store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on=None, fperiod=None,periods=None,periods_dict=None): """" """ if fperiod is not None and periods is not None : clogger.debug('Adding period %s'%fperiod) periods.append(fperiod) # for kw in kwargs : it=re.finditer(facets_regexp,f) for oc in it : try : facet_value=oc.group(kw) except : continue if type(kwargs[kw]) is str and ("*" in kwargs[kw] or "?" in kwargs[kw] ): if facet_value is not None : if kw not in wildcards : wildcards[kw]=set() wildcards[kw].add(facet_value) clogger.debug("Discover %s=%s for file=%s"%(kw,facet_value,f)) else : clogger.debug("Logic issue for kw=%s and file=%s"%(kw,f)) # if fperiod is not None and periods is not None : if merge_periods_on is None : key=None elif kw == merge_periods_on : key=facet_value else : #print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on) continue if key not in periods_dict: periods_dict[key]=set() #print "adding period %s for key %s"%(fperiod,key) periods_dict[key].add(fperiod) else: pass #print "no Adding period for %s=%s for %s"%(kw,facet_value,f) #print "end of store, periods_dict=",periods_dict, "wild=",wildcards rep=[] # periods=None # a list of periods available periods_dict=dict() # period=kwargs['period'] ; if period == "*" : periods=[] # List of all periods elif type(period) is str : period=init_period(period) # variable=kwargs['variable'] altvar=kwargs.get('filenameVar',variable) # # dicts of date patterns, for globbing and for regexp # digit="[0-9]" date_glob_patt={ "${PERIOD}" : "*" } # an ordered list of dates keywords date_keywords=date_glob_patt.keys() ; date_keywords.sort(reverse=True) # annee="%s{4}"%digit mois="(01|02|03|04|05|06|07|08|09|10|11|12)" jour="([0-3][0-9])" heure="(00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23)" minutes="[0-5][0-9]" date="%s(%s(%s(%s(%s)?)?)?)?"%(annee,mois,jour,heure,minutes) rperiod="(?P<period>(?P<start>%s)([_-](?P<end>%s))?)"%(date,date) date_regexp_patt={ "${PERIOD}" : rperiod } # an ordered list of dates regexp keywords date_regexp_keywords=date_regexp_patt.keys() ; date_regexp_keywords.sort(reverse=True) # # for l in urls : # Instantiate keywords in pattern with attributes values remote_prefix="" ; if re.findall(".*:.*",l) : remote_prefix=':'.join(l.split(":")[0:-1])+':' basename=l.split(":")[-1] # This discard the remote_prefix if any basename=basename.replace("//","/") my_template=Template(basename) template=my_template.safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k]) # Do globbing with plain varname if remote_prefix : lfiles=sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for varname on %s : "%\ (len(lfiles),remote_prefix+temp2)) else: # local data lfiles=sorted(glob.glob(temp2)) clogger.debug("Before regexp filtering : Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # Must filter with regexp, because * with glob is too inclusive alt=[] for f in lfiles : for k in date_keywords : if re.search(date_regexp_patt[k],f) : alt.append(f) continue lfiles=alt clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] : # Change value of facet 'variable' kwargs['variable']=kwargs['filenameVar'] template=my_template.safe_substitute(**kwargs) temp2=template for k in date_keywords : temp2=temp2.replace(k,date_glob_patt[k]) # # Do globbing with fileVarname if remote_prefix : # lfiles=sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for filenamevar on %s: "%\ (len(lfiles),remote_prefix+temp2)) else: # local data lfiles=sorted(glob.glob(temp2)) # Must filter with regexp, because * with glob is too inclusive alt=[] for f in lfiles : for k in date_keywords : if re.search(date_regexp_patt[k],f) : alt.append(f) continue lfiles=alt clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2)) # # For discovering values for those facets which are a wildcard, # construct a regexp with a group name for all facets (but period) alt_basename=basename.replace("?",".").replace("*",".*") alt_kwargs=kwargs.copy() for kw in kwargs : if type(kwargs[kw]) is str : # This excludes period attribute, which has a type alt_kwargs[kw]=kwargs[kw].replace("?",".").replace("*",".*") alt_basename=alt_basename.replace(r"${%s}"%kw,r"(?P<%s>%s)"%(kw,alt_kwargs[kw]),1) facets_regexp=Template(alt_basename).safe_substitute(**alt_kwargs) for k in date_regexp_keywords : facets_regexp=facets_regexp.replace(k,date_regexp_patt[k],1) facets_regexp=facets_regexp.replace(k,".*") wildcards=dict() #print "facets_regexp=",facets_regexp # # Construct regexp for extracting dates from filename date_regexp=None template_toreg=template.replace("*",".*").replace("?",r".").replace("+","\+") #print "template before searching dates : "+template_toreg for key in date_regexp_keywords : #print "searchin "+key+" in "+template start=template_toreg.find(key) if (start>=0 ) : date_regexp=template_toreg.replace(key,date_regexp_patt[key],1) #print "found ",key," dateregexp ->",date_regexp hasEnd=False start=date_regexp.find(key) #start=date_regexp.find(key) if (start >=0 ) : hasEnd=True date_regexp=date_regexp.replace(key,date_regexp_patt[key],1) #date_regexp=date_regexp.replace(key,date_regexp_patt[key],1) break #print "date_regexp before searching dates : "+date_regexp # for f in lfiles : #print "processing file "+f # # Extract file time period # fperiod=None if date_regexp : if "P<period>" in date_regexp : #print "date_rexgep=",date_regexp #print "f=",f #print "period=",re.sub(date_regexp,r'\g<period>',f) tperiod=re.sub(date_regexp,r'\g<period>',f) if tperiod==f : raise classes.Climaf_Error("Cannot find a period in %s with regexp %s"%(f,date_regexp)) fperiod=init_period(tperiod) else: date_regexp0=date_regexp #print "date_regexp for extracting dates : "+date_regexp0, "file="+f start=re.sub(date_regexp0,r'\1',f) if start==f: raise Climaf_Data_Error("Start period not found in %s using regexp %s"%(f,regexp0)) #? if hasEnd : end=re.sub(date_regexp0,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else : if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : # local data if not remote_prefix and \ ( (basename.find("${variable}")>=0) or variable=='*' or \ fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) : clogger.debug("adding fixed field :"+f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(f) # remote data elif remote_prefix : if (basename.find("${variable}")>=0) or variable=='*' or \ (variable != altvar and (f.find(altvar)>=0) ): clogger.debug("adding fixed field :"+remote_prefix+f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(remote_prefix+f) else: raise classes.Climaf_Error( "For remote files, filename pattern (%s) should include ${varname} "+\ "(which is instanciated by variable name or filenameVar)"%f) else : clogger.info("Cannot yet filter files re. time using only file content.") store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on) rep.append(f) # # If file period matches requested period, check similarly for variable # #print "fperiod=",fperiod #print "periods=",periods #print "inter=",period.intersects(fperiod) #print "date_regexp=",date_regexp if (fperiod and ( periods is not None or period.intersects(fperiod) )) \ or not date_regexp : # clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) # Filter against variable if (l.find("${variable}")>=0): clogger.debug('appending %s based on variable in filename'%f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on, fperiod,periods,periods_dict) rep.append(remote_prefix+f) continue if (f not in rep): # local data if not remote_prefix and \ (variable=='*' or "," in variable or fileHasVar(f,variable) or \ (altvar != variable and fileHasVar(f,altvar))) : # Should check time period in the file if not date_regexp clogger.debug('appending %s based on multi-var or var exists in file '%f) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards,merge_periods_on, fperiod,periods,periods_dict) rep.append(f) continue # remote data elif remote_prefix : if variable=='*' or "," in variable or \ (variable != altvar and (f.find(altvar)>=0) ): # Should check time period in the file if not date_regexp clogger.debug('appending %s based on multi-var or altvar '%(remote_prefix+f)) store_wildcard_facet_values(f,facets_regexp, kwargs, wildcards, merge_periods_on, fperiod,periods,periods_dict) rep.append(remote_prefix+f) continue else: mess="For remote files, filename pattern (%s) should include"%(remote_prefix+f) mess+=" ${varname} (which is instanciated by variable name or filenameVar)" raise classes.Climaf_Error(mess) else: if not fperiod : clogger.debug('not appending %s because period is None '%f) elif not period.intersects(fperiod) : clogger.debug('not appending %s because period doesn t intersect %s'%(f,period)) else: clogger.debug('not appending %s for some other reason %s'%(f)) # Break on first url with any matching data if len(rep)>0 : clogger.debug('url %s does match for '%l + `kwargs`) break # For wildcard facets, discover facet values + checks for facet in wildcards: s=wildcards[facet] if return_wildcards is not None : if facet=="period" : #print "s=",s," periods_dict=",periods_dict for val in periods_dict : periods_dict[val]=sort_periods_list(list(periods_dict[val])) clogger.info("Attribute period='*' has values %s"%(periods_dict)) return_wildcards["period"]=periods_dict else: if len(s) == 1 : s=s.pop() clogger.info("Attribute %s='%s' has matching value '%s'"%(facet,kwargs[facet],s)) return_wildcards[facet]=s else: rep=list(s); rep.sort() return_wildcards[facet]=rep message="Attribute %s='%s' has multiple values : %s"%(facet,kwargs[facet],list(s)) if return_wildcards : clogger.info(message) else: clogger.error(message) s=return_wildcards[facet] else: clogger.debug("return_wildcards is None") return rep
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory. However, for remote files, filename pattern must include ${varname}, which is instanciated by variable name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is for the sake of efficiency (please complain if inadequate) Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep = [] period = kwargs['period'] if type(period) is str: period = init_period(period) variable = kwargs['variable'] altvar = kwargs.get('filenameVar', variable) # a dict and an ordered list of date globbing patterns dt = dict(YYYY="????", YYYYMM="??????", YYYYMMDD="????????", YYYYMMDDHH="??????????") lkeys = dt.keys() lkeys.sort(reverse=True) # a dict and an ordered list for matching dates dr = dict(YYYY="([0-9]{4})", YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})", YYYYMMDDHH="([0-9]{10})") rkeys = dr.keys() rkeys.sort(reverse=True) # for l in urls: # Instantiate keywords in pattern with attributes values if re.findall(".*:.*", l): # remote data remote_prefix = ':'.join(l.split(":")[0:-1]) + ':' template = Template(l.split(":")[-1]).safe_substitute(**kwargs) else: # local data remote_prefix = "" template = Template(l).safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2 = template for k in lkeys: temp2 = temp2.replace(k, dt[k]) if remote_prefix: lfiles = sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Remote globbing %d files for varname on %s : " % (len(lfiles), remote_prefix + temp2)) else: # local data lfiles = sorted(glob.glob(temp2)) clogger.debug("Globbing %d files for varname on %s : " % (len(lfiles), temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']: # Change value of facet 'variable' kwargs['variable'] = kwargs['filenameVar'] if remote_prefix: # remote data template = Template(l.split(":")[-1]).safe_substitute(**kwargs) else: # local data template = Template(l).safe_substitute(**kwargs) temp2 = template for k in lkeys: temp2 = temp2.replace(k, dt[k]) # if remote_prefix: # lfiles = sorted(glob_remote_data(remote_prefix, temp2)) clogger.debug("Globbing %d files for filenamevar on %s: " % (len(lfiles), remote_prefix + temp2)) else: # local data lfiles = sorted(glob.glob(temp2)) clogger.debug("Globbing %d files for filenamevar on %s: " % (len(lfiles), temp2)) # # Construct regexp for extracting dates from filename regexp = None #print "template before searching dates : "+template for key in rkeys: #print "searchin "+key+" in "+=Template(l) start = template.find(key) if (start >= 0): #print "found "+key regexp = template.replace(key, dr[key], 1) hasEnd = False start = regexp.find(key) if (start >= 0): hasEnd = True regexp = regexp.replace(key, dr[key], 1) break #print "regexp before searching dates : "+regexp # for f in lfiles: #print "processing file "+f # # Analyze file time period fperiod = None if regexp: regexp0 = regexp.replace("*", ".*").replace("?", r".") #print "regexp for extracting dates : "+regexp start = re.sub(regexp0, r'\1', f) if start == f: raise Climaf_Data_Error("Start period not found") #? if hasEnd: end = re.sub(regexp0, r'\2', f) fperiod = init_period("%s-%s" % (start, end)) else: fperiod = init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else: if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : # local data if remote_prefix and \ ( (l.find("${variable}")>=0) or variable=='*' or \ fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) : clogger.debug("adding fixed field :" + f) rep.append(f) # remote data elif remote_prefix is not "": if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \ (variable != altvar and (f.find(altvar)>=0) ): clogger.debug("adding fixed field :" + remote_prefix + f) rep.append(remote_prefix + f) else: raise Climaf_Data_Error( "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)" % f) else: clogger.info( "Cannot yet filter files re. time using only file content." ) rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp: clogger.debug( 'Period is OK - Considering variable filtering on %s and %s for %s' % (variable, altvar, f)) # Filter against variable if (l.find("${variable}") >= 0): clogger.debug( 'appending %s based on variable in filename' % f) rep.append(remote_prefix + f) continue if (f not in rep): # local data if not remote_prefix and \ (variable=='*' or "," in variable or fileHasVar(f,variable) or \ (altvar != variable and fileHasVar(f,altvar))) : # Should check time period in the file if not regexp clogger.debug( 'appending %s based on multi-var or var exists in file ' % f) rep.append(f) continue # remote data elif remote_prefix: if variable=='*' or "," in variable or \ (variable != altvar and (f.find(altvar)>=0) ): # Should check time period in the file if not regexp clogger.debug( 'appending %s based on multi-var or altvar ' % (remote_prefix + f)) rep.append(remote_prefix + f) continue else: mess = "For remote files, filename pattern (%s) should include" % ( remote_prefix + f) mess += " ${varname} (which is instanciated by variable name or filenameVar)" raise Climaf_Data_Error(mess) else: if not fperiod: clogger.debug('not appending %s because period is None ' % f) else: if not period.intersects(fperiod): clogger.debug( 'not appending %s because period doesn t intersect %s' % (f, period)) # Break on first url with any matching data if len(rep) > 0: clogger.debug('url %s does match for ' % l + ` kwargs `) break return rep
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep=[] period=kwargs['period'] if type(period) is str : period=init_period(period) variable=kwargs['variable'] mustHaveVariable=False if "filenameVar" in kwargs and kwargs['filenameVar'] : kwargs['variable']=kwargs['filenameVar'] mustHaveVariable=True for l in urls : template=Template(l) # There is no use to look for files which path is not specific # to the required variable when we know it should if l.find("${variable}") < 0 and mustHaveVariable : continue # # Instantiate keywords in pattern with attributes values template=template.safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????") for k in dt : temp2=temp2.replace(k,dt[k]) clogger.debug("Globbing on : "+temp2) lfiles=glob.glob(temp2) # # Analyze all filenames for f in lfiles : # print "looking at file"+f # Construct regexp for extracting dates from filename dt=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{10})") regexp=None # print "template before searching dates : "+template lkeys=dt.keys() ; lkeys.sort(reverse=True) for key in lkeys : # print "searchin "+key+" in "+template start=template.find(key) if (start>=0 ) : # print "found "+key regexp=template.replace(key,dt[key],1) hasEnd=False start=regexp.find(key) if (start >=0 ) : hasEnd=True regexp=regexp.replace(key,dt[key],1) break # # Analyze file time period fperiod=None if regexp : regexp=regexp.replace("*",".*").replace("?",r".") # print "regexp for extracting dates : "+regexp start=re.sub(regexp,r'\1',f) if hasEnd : end=re.sub(regexp,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) # # Filter file time period against required period else : if ( 'frequency' in kwargs and kwargs['frequency']=="fx") : if (l.find("${variable}")>=0) or fileHasVar(f,variable) : clogger.debug("adding fixed field :"+f) rep.append(f) else : clogger.warning("Cannot yet filter files re. time using only file content. TBD") rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp : # Filter against variable if (l.find("${variable}")>=0) or fileHasVar(f,variable) : # Should check time period in the file if not regexp # print "appending "+f rep.append(f) return rep
def zonal_mean_slice(model, variable, basin, season, ref=None, add_product_in_title=True, method='regrid_model_on_ref', custom_plot_params={}, safe_mode=True, do_cfile=True, y='lin', ymin=None, plot_on_latitude=False, horizontal_regridding=True, apply_period_manager=True): # ----------------------------------------------------------------------------------------------------------------------------- # -- 1/ Moyenne zonale du modele: # -> soit on a la variable en moyenne zonale deja calculee (zoblabla) # -> soit on la calcule a partir des masks de bassin definis dans model: # * path_mesh_mask donne le path vers les fichiers # * mesh_masks est un dictionnaire qui pointe le fichier de mask pour chaque bassin (GLO, ATL, PAC, IND et ALLBAS) # -- Test pour voir si on a deja les moyennes zonales par bassin dans les variables dispos context = 'full_field' # -- Si method=='regrid_model_on_obs', on regrille le modele sur la grille de la reference, et on utilisera # les masks de bassin de la reference qui sont dans son repertoire ($bassin_mask.nc) # Cette methode favorise la structure latitudinale de la section (=> gradient eq/pole) if method=='regrid_model_on_ref': # -- Apply the frequency and time manager (IGCM_OUT) wmodel=model.copy() ; wmodel.update(dict(variable=variable)) if apply_period_manager: frequency_manager_for_diag(wmodel, diag='clim') get_period_manager(wmodel) # -- Get the model data model_dat = ds(**wmodel) # -- Compute the climatology on the model grid and mask the zeros clim_model = clim_average( mask(model_dat, miss=0.0), season) # # -- Get the reference if ref: ref.update(dict(variable=variable)) # -- Get the reference data ref_dat = ds(**ref) # # -- Get the context => model_model or bias context = ('bias' if 'product' in ref_dat.kvp else 'model_model') # # -- Compute the climatology clim_ref = clim_average(ref_dat, season) # # -- Regrid the model on the obs if safe_mode: try: rgrd_clim_model = lonlatvert_interpolation( regrid(clim_model, clim_ref, option='remapdis'), clim_ref, horizontal_regridding=False ) except: print '--> Error in lonlatvert_interpolation( regrid(clim_model, clim_ref, option="remapdis"), clim_ref, horizontal_regridding=False )' print '--> Set safe_mode=False to see the error' rgrd_clim_model = clim_model else: rgrd_clim_model = lonlatvert_interpolation( regrid(clim_model, clim_ref, option='remapdis'), clim_ref, horizontal_regridding=False ) print '----' print '----' print '----' print 'rgrd_clim_model = ',cfile(rgrd_clim_model) print 'clim_model = ', cfile(clim_model) print 'clim_ref = ', cfile(clim_ref) print '----' print '----' print '----' # # -- Get the reference mask if 'path_mesh_mask' in ref: mask_file = ref['path_mesh_mask'] + ref['mesh_masks'][basin] else: mask_file = os.path.dirname(str.split(ref_dat.baseFiles(),' ')[0])+'/'+basin.lower()+'_mask.nc' print '----' print '----' print '----' print '---> mask_file = ', mask_file print '----' print '----' print '----' mask_dat = fds( mask_file, variable='mask', period='fx') basin_mask = mask( mask_dat, miss=0.0) # # -- Apply the mask to the model and the ref masked_model = multiply(rgrd_clim_model, basin_mask) masked_ref = multiply(clim_ref, basin_mask) # if 'product' not in ref: masked_model = regridn(masked_model, cdogrid='r360x180', option='remapdis') masked_ref = regridn(masked_ref, cdogrid='r360x180', option='remapdis') # -- Compute the zonal means ZM_MODEL = zonmean(masked_model) ZM_REF = zonmean(masked_ref) # #print '===' #print '===' #print '===' #print '=== ZM_MODEL = ',cfile(ZM_MODEL) #print '=== ZM_REF = ',cfile(ZM_REF) #print '===' #print '===' #print '===' # -- Interpolate vertically and compute the difference if safe_mode: try: ZM_bias = diff_zonmean(ZM_MODEL, ZM_REF) except: print '--> Error in diff_zonmean(ZM_MODEL, ZM_REF)' print '--> Set safe_mode=False to track the error' ZM_bias = minus(ZM_MODEL, ZM_REF) else: ZM_bias = diff_zonmean(ZM_MODEL, ZM_REF) # -- Compute the zonal mean for the basin using the obs masks else: print 'No reference (obs) provided in zonal_mean_slice for method regrid_model_on_obs' # -- Get the reference mask if 'path_mesh_mask' in model: mask_file = model['path_mesh_mask'] + model['mesh_masks'][basin] else: mask_file = os.path.dirname(str.split(model_dat.baseFiles(),' ')[0])+'/'+basin.lower()+'_mask.nc' print 'mask_file = ', mask_file mask_dat = fds( mask_file, variable='mask', period='fx') basin_mask = mask( mask_dat, miss=0.0) # # -- Apply the mask to the model and the ref masked_model = multiply(clim_model, basin_mask) # if 'product' not in model: masked_model = regridn(masked_model, cdogrid='r360x180', option='remapdis') # -- Compute the zonal means ZM_MODEL = zonmean(masked_model) # if method=='regrid_ref_on_model': # # -> Cette methode #if variable=='thetao': tmpzonmvar = 'zotem'+region.lower() #if variable=='so': tmpzonmvar = 'zosal'+region.lower() # -- Apply the frequency and time manager (IGCM_OUT) #wmodel=model.copy() #; wmodel.update(dict(variable=tmpzonmvar)) #wmodel.update(dict(variable=variable)) #frequency_manager_for_diag(wmodel, diag='clim') #get_period_manager(wmodel) #model_dat = ds(**wmodel) # -> on regarde si ds() trouve un fichier qui correspondn a la variable #if tmp.baseFiles(): # # --> Fix to add nav_lat to the file # if not fileHasDim(cfile(tmp),'nav_lat'): # zonmean_model = add_nav_lat(tmp, nav_lat_file=nav_lat_zovarbasin_file(grid=whichORCAGrid(cfile(tmp))), # coordinates=build_coordinates_zovarbasin(cfile(tmp))) # else: # zonmean_model = tmp # modvar_climato_zonmean_basin = mask(clim_average(zonmean_model, season), miss=0.0) #else: # # -- Apply the frequency and time manager (IGCM_OUT) wmodel=model.copy() ; wmodel.update(dict(variable=variable)) if apply_period_manager: frequency_manager_for_diag(wmodel, diag='SE') get_period_manager(wmodel) model_dat = ds(**wmodel) model_clim = ccdo(clim_average(model_dat,season), operator='setctomiss,0') if fileHasVar(cfile(model_clim), 'lev'): model_clim_ok = rename_depth(model_clim) else: model_clim_ok = model_clim # # if method=='regrid_model_on_1deg_grid': # # --> In this case, we regrid the model on the obs #maskfile = model['path_mesh_mask'] + model['mesh_masks'][basin] #wmask = ccdo(fds(maskfile, variable='tmask', period='fx'), operator='setctomiss,0') #modvar_climato_masked = multiply(model_clim_ok, wmask) # modvar_rgrd = regridn(modvar_climato_masked, cdogrid='r360x180',option='remapdis') # modvar_climato_zonmean_basin = zonmean(modvar_rgrd) # # #else: set_fixed_fields('ccdfzonalmean_bas', basin, model) # calculer la moyenne zonale pour le bassin choisi model_clim_zonmean_basin = ccdfzonalmean_bas(model_clim_ok, point_type='T', basin=str(basin).lower()) # -- Ajouter les latitudes ici?? #else: ZM_MODEL = model_clim_zonmean_basin print '--' print '--' print '--' print 'cfile(ZM_MODEL) = ',cfile(ZM_MODEL) print '--' print '--' print '--' # # ----------------------------------------------------------------------------------------------------------------------------- # -- 2/ Moyenne zonale de la ref: # -- -> les refs sont fournies avec les masks de bassins; si la ref est un modele, # on peut recuperer path_mesh_mask et mesh_masks (et donc les fichiers de masks de bassins) if ref: # calculer la climatologie pour la saison choisie if 'variable' not in ref: ref.update(dict(variable=variable)) ref_dat = ds(**ref) ref_clim = clim_average(ref_dat, season) # -- Check whether the ref is a model or an obs to set the appropriate context context = ('bias' if 'product' in ref_dat.kvp else 'model_model') # 1. Si le context est 'model_model', on verifie si la variable ne serait pas disponible en moyenne zonale # - si oui, on travaille directement avec celle-ci # - si non, on recupere les masks de bassins # 2. Si le context est 'bias', on recupere les masks de bassins qui doivent etre dans le repertoire des obs # A partir des masks, on calcule les moyennes zonales par bassin #zovarbas_ref = ref.copy() ; zovarbas.update(dict(variable=tmpzonmvar)) #tmpref = ds(**zovarbas_ref) # -> on regarde si ds() trouve un fichier qui correspondn a la variable # -- Si on a les variables pre-calculees en moyennes zonales pour le model et les obs, on utilise ces moyennes zonales # --> Ok si on utilise WOA13-v2 comme reference #if tmpref.baseFiles() and tmp.baseFiles(): # ref_clim_zonmean_basin_interp = regridn(mask(clim_average(tmpref, season), miss=0.0), cdogrid='r1x180', option='remapdis') # model_clim_zonmean_basin_interp = regridn(model_clim_zonmean_basin, cdogrid='r1x180', option='remapdis') # ZM_OBS = zonmean_interpolation(ref_clim_zonmean_basin_interp, model_clim_zonmean_basin_interp) # ZM_MODEL = model_clim_zonmean_basin_interp #else: # # ## ref_clim = mask(clim_average(ref_dat, season), miss=0.0) if fileHasVar(cfile(ref_clim), 'lev'): ref_clim_ok = rename_depth(ref_clim) else: ref_clim_ok = ref_clim print "cfile(ref_clim_ok) = ",cfile(ref_clim_ok) # # -- Si 'ref' est un autre simulation et a des mesh_masks, on les utilisent if context=='model_model' and 'mesh_masks' in ref: set_fixed_fields('ccdfzonalmean_bas', basin, ref) ref_clim_interp = ref_clim_ok else: # -> Sinon, on regrille 'obs' sur le modele, et on utilise les masks de bassins # -> du model pour calculer les moyennes zonales #ref_clim_interp = regrid(ref_clim_ok, model_clim_ok, option='remapdis') ref_clim_interp = ccdo(ref_clim_ok, operator='remapdis,'+cfile(model_clim_ok)) test = lonlatvert_interpolation(ref_clim_interp, model_clim_ok, horizontal_regridding=False) ref_clim_zonmean_basin = ccdfzonalmean_bas(test, point_type='T', basin=str(basin).lower()) # calculer la moyenne zonale pour le bassin choisi ZM_OBS = zonmean_interpolation(ref_clim_zonmean_basin, model_clim_zonmean_basin, horizontal_regridding=False) ZM_bias = minus(ZM_MODEL, ZM_OBS) # # -- Now compute the difference (bias) if method=='regrid_on_1deg': print 'Not yet available : ',method # Plot # # -- Get the period for display in the plot: we build a tmp_period string # -- Check whether the period is described by clim_period, years or period (default) # -- and make a string with it tmp_period = build_period_str(wmodel) # # -- Title of the plot -> If 'customname' is in the dictionary of dat, it will be used # -- as the title. If not, it checks whether dat is a reference or a model simulation # -- and builds the title title = build_plot_title(wmodel, None)# add_product_in_title='') #add_product_in_title) # # -- Get the default plot parameters with the function 'plot_params' # -- We also update with a custom dictionary of params (custom_plot_params) if the user sets one p = plot_params(variable+'_zonmean', context, custom_plot_params=custom_plot_params) p.update(dict(y=y, contours=1, tiMainFontHeightF=0.023,tiMainFont="helvetica-bold", gsnStringFontHeightF=0.019, options="cnMissingValFillColor=gray|trYReverse=True|"+\ "vpHeightF=0.4|vpWidthF=0.8|"+\ "pmLabelBarWidthF=0.075|pmLabelBarOrthogonalPosF=0.01|lbLabelFontHeightF=0.012|" )) if ymin: p['options']=p['options']+'|trYMinF='+str(ymin) # # -- Set the left, center and right strings of the plot p.update(dict(gsnRightString = tmp_period, gsnCenterString = variable+' '+method, gsnLeftString = basin)) # if ref: ZM = ZM_bias else: ZM = ZM_MODEL # plot_zonmean = plot(ZM, title=title, **p) # -- If the user doesn't want to do the cfile within plot_climato, set do_cfile=False # -- Otherwise we check if the plot has been done successfully. # -- If not, the user can set safe_mode=False and clog('debug') to debug. return safe_mode_cfile_plot(plot_zonmean, do_cfile, safe_mode)
def selectGenericFiles(urls, **kwargs): """ Allow to describe a ``generic`` file organization : the list of files returned by this function is composed of files which : - match the patterns in ``url`` once these patterns are instantiated by the values in kwargs, and - contain the ``variable`` provided in kwargs - match the `period`` provided in kwargs In the pattern strings, no keyword is mandatory Example : >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)'] /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc In the pattern strings, the keywords that can be used in addition to the argument names (e.g. ${model}) are: - ${variable} : use it if the files are split by variable and filenames do include the variable name, as this speed up the search - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of the period covered by each file, if this is applicable in the file naming; use a second time for end date, if applicable (otherwise the assumption is that the whole year -resp. month or day- is included in the file - wildcards '?' and '*' for matching respectively one and any number of characters """ rep=[] period=kwargs['period'] if type(period) is str : period=init_period(period) variable=kwargs['variable'] altvar=kwargs.get('filenameVar',variable) # a dict and an ordered list of date globbing patterns dt=dict(YYYY="????",YYYYMM="??????",YYYYMMDD="????????") lkeys=dt.keys() ; lkeys.sort(reverse=True) # a dict and an ordered list for matching dates dr=dict(YYYY="([0-9]{4})",YYYYMM="([0-9]{6})", YYYYMMDD="([0-9]{8})") rkeys=dr.keys() ; rkeys.sort(reverse=True) # for l in urls : # Instantiate keywords in pattern with attributes values template=Template(l).safe_substitute(**kwargs) #print "template after attributes replace : "+template # # Construct a pattern for globbing dates temp2=template ; for k in lkeys : temp2=temp2.replace(k,dt[k]) lfiles=glob.glob(temp2) clogger.debug("Globbing %d files for varname on %s : "%(len(lfiles),temp2)) # # If unsuccessful using varname, try with filenameVar if len(lfiles)==0 and "filenameVar" in kwargs and kwargs['filenameVar'] : kwargs['variable']=kwargs['filenameVar'] template=Template(l).safe_substitute(**kwargs) temp2=template for k in lkeys : temp2=temp2.replace(k,dt[k]) # lfiles=glob.glob(temp2) clogger.debug("Globbing %d files for filenamevar on %s: "%(len(lfiles),temp2)) # Construct regexp for extracting dates from filename regexp=None #print "template before searching dates : "+template for key in rkeys : #print "searchin "+key+" in "+=Template(l) start=template.find(key) if (start>=0 ) : #print "found "+key regexp=template.replace(key,dr[key],1) hasEnd=False start=regexp.find(key) if (start >=0 ) : hasEnd=True regexp=regexp.replace(key,dr[key],1) break #print "regexp before searching dates : "+regexp # for f in lfiles : #print "processing file "+f # # Analyze file time period fperiod=None if regexp : regexp0=regexp.replace("*",".*").replace("?",r".") #print "regexp for extracting dates : "+regexp start=re.sub(regexp0,r'\1',f) if start==f: raise Climaf_Data_Error("Start period not found") #? LV if hasEnd : end=re.sub(regexp0,r'\2',f) fperiod=init_period("%s-%s"%(start,end)) else : fperiod=init_period(start) #print "period for file %s is %s"%(f,fperiod) # # Filter file time period against required period else : if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \ kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) : if (l.find("${variable}")>=0) or fileHasVar(f,variable) or fileHasVar(f,altvar) : clogger.debug("adding fixed field :"+f) rep.append(f) else : clogger.warning("Cannot yet filter files re. time using only file content. TBD") rep.append(f) if (fperiod and period.intersects(fperiod)) or not regexp : clogger.debug('Period is OK - Considering variable filtering on %s and %s for %s'%(variable,altvar,f)) # Filter against variable if (l.find("${variable}")>=0): clogger.debug('appending %s based on variable in filename'%f) rep.append(f) continue if f not in rep and ( fileHasVar(f,variable) or fileHasVar(f,altvar) or ("," in variable)): # Should check time period in the file if not regexp clogger.debug('appending %s based on multi-var or var exists in file '%f) rep.append(f) else: if not fperiod : clogger.debug('not appending %s because period is None '%f) else: if not period.intersects(fperiod) : clogger.debug('not appending %s because period doesn t intersect %s'%(f,period)) return rep