Пример #1
0
def hasMatchingObject(cobject,ds_func) : 
    """
    If the cache holds a file which represents an object with the
    same nodes as COBJECT and which leaves/datasets, when paired with
    those of COBJECT and applying ds_func, returns an identical (and not
    None) value for all pairs, then returns its filename, its CRS and
    this value (for the first one in dict crs2filename)

    Can be applied for finding same object with included or including
    time-period
    """
    # First read index from file if it is yet empty - No : done at startup
    # if len(crs2filename.keys()) == 0 : cload()
    def op_squeezes_time(operator):
        import operators
        return not operators.scripts[operator].flags.commuteWithTimeConcatenation 
    #
    for crs in crs2filename.copy() :
        try: 
            co=eval(crs, sys.modules['__main__'].__dict__)
            altperiod=compare_trees(co,cobject, ds_func,op_squeezes_time)
            if altperiod :
                if os.path.exists(crs2filename[crs]) :
                    return co,altperiod
                else :
                    clogger.debug("Removing %s from cache index, because file is missing",crs)
                    crs2filename.pop(crs)
        except :
            pass # usually case of a CRS which project is not currently defined
    return None,None
Пример #2
0
def register(filename,crs):
    """ Adds in FILE a metadata named CRS_def and with value CRS. 
    Records this FILE in dict crs2filename

    Silently skip non-existing files
    """
    # First read index from file if it is yet empty
    if len(crs2filename.keys()) == 0 : cload()
    # It appears that we have to allow the file system some time for updating its inode tables
    waited=0
    while waited < 10 and not os.path.exists(filename) :
        time.sleep(0.5)
        waited += 1
    time.sleep(0.5)
    if os.path.exists(filename) :
        #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2)
        if re.findall(".nc$",filename) : 
            command="ncatted -h -a CRS_def,global,o,c,\"%s\" %s"%(crs,filename)
        if re.findall(".png$",filename) :
            command="convert -set \"CRS_def\" \"%s\" %s %s.png && mv -f %s.png %s"%\
                (crs,filename,filename,filename,filename)
        clogger.debug("trying stamping by %s"%command)
        if ( os.system(command) == 0 ) :
            crs2filename[crs]=filename
            clogger.info("%s registered as %s"%(crs,filename))
            return True
        else : 
            clogger.critical("cannot stamp by %s"%command)
            return None
    else :
        clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
Пример #3
0
def set_variable(obj, varname, format) :
    """ Change to VARNAME the variable name for OBJ, which FORMAT 
    maybe 'file' or 'MaskedArray'. 
    Also set the variable long_name using CF convention (TBD)
    """
    if obj is None : return None
    long_name=CFlongname(varname)
    if (format=='file') :
        oldvarname=varOfFile(obj)
        if (oldvarname != varname) :
            command="ncrename -v %s,%s %s >/dev/null 2>&1"%(oldvarname,varname,obj)
            if ( os.system(command) != 0 ) :
                clogger.error("Issue with changing varname to %s in %s"%(varname,obj))
                return None
            clogger.debug("Varname changed to %s in %s"%(varname,obj))
            command="ncatted -a long_name,%s,o,c,%s %s"%(varname,long_name,obj)
            if ( os.system(command) != 0 ) :
                clogger.error("Issue with changing long_name for var %s in %s"%
                              (varname,obj))
                return None
            return True
    elif (format=='MaskedArray') :
        clogger.warning('TBD - Cannot yet set the varname for MaskedArray')
    else :
        clogger.error('Cannot handle format %s'%format)
Пример #4
0
def capply(climaf_operator, *operands, **parameters):
    """ Builds the object representing applying a CliMAF operator (script, function or macro)
    
    Returns results as a list of CliMAF objects and stores them if auto-store is on
    """
    res=None
    if operands is None or operands[0] is None :
        raise Climaf_Driver_Error("Operands is None")
    opds=map(str,operands)
    if climaf_operator in operators.scripts :
        #clogger.debug("applying script %s to"%climaf_operator + `opds` + `parameters`)
        res=capply_script(climaf_operator, *operands, **parameters)
        # Evaluate object right now if there is no output to manage
        op=operators.scripts[climaf_operator]
        if op.outputFormat is None : ceval(res,userflags=copy.copy(op.flags))
    elif climaf_operator in cmacro.cmacros :
        if (len(parameters) > 0) :
            raise Climaf_Driver_Error("Macros cannot be called with keyword args")
        clogger.debug("applying macro %s to"%climaf_operator + `opds` )
        res=cmacro.instantiate(cmacro.cmacros[climaf_operator],*operands)
    elif climaf_operator in operators.operators :
        clogger.debug("applying operator %s to"%climaf_operator + `opds` + `parameters`)
        res=capply_operator(climaf_operator,*operands, **parameters)
    else:
        clogger.error("%s is not a known operator nor script"%climaf_operator)
    return res
Пример #5
0
def register(filename,crs):
    """ 
    Adds in FILE a metadata named 'CRS_def' and with value CRS, and a
    metadata 'CLiMAF' with CliMAF version and ref URL

    Records this FILE in dict crs2filename

    Silently skip non-existing files
    """
    # First read index from file if it is yet empty - No : done at startup
    # if len(crs2filename.keys()) == 0 : cload()
    # It appears that we have to let some time to the file system  for updating its inode tables
    waited=0
    while waited < 20 and not os.path.exists(filename) :
        time.sleep(0.1)
        waited += 1
    #time.sleep(0.5)
    if os.path.exists(filename) :
        #while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2)
        if re.findall(".nc$",filename) : 
            command="ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s"%\
                (crs,version,filename)
        if re.findall(".png$",filename) :
            command="convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version %s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s"%\
                (crs,version,filename,filename,filename,filename)
        clogger.debug("trying stamping by %s"%command)
        if ( os.system(command) == 0 ) :
            crs2filename[crs]=filename
            clogger.info("%s registered as %s"%(crs,filename))
            return True
        else : 
            clogger.critical("cannot stamp by %s"%command)
            return None
    else :
        clogger.error("file %s does not exist (for crs %s)"%(filename,crs))
Пример #6
0
def crewrite(crs,alsoAtTop=True):
    """
    Return the crs expression with sub-trees replaced by macro equivalent 
    when applicable

    Search order is : from CRS tree root try all macros, then do the
    same for first subtree, and recursively in depth, and then go 
    to second subtreesecond 
    """
    # Next line used for interpreting macros's CRS
    exec("ARG=climaf.cmacro.cdummy()", sys.modules['__main__'].__dict__)
    #
    try :
        co=eval(crs, sys.modules['__main__'].__dict__)
    except:
        clogger.debug("Issue when rewriting %s"%crs)
        return(crs)
    if isinstance(co,ctree) or isinstance(co,scriptChild) or isinstance(co,cpage) :
        if alsoAtTop :
            for m in cmacros :
                clogger.debug("looking at macro : "+m+"="+`cmacros[m]`+\
                           " \ncompared to : "+`macro(None,co)`)
                argl=cmatch(cmacros[m],co)
                if len(argl) > 0 :
                    rep=m+"("
                    for arg in argl :
                        rep+=crewrite(arg.buildcrs(crsrewrite=crewrite))+","
                    rep+=")"; rep=rep.replace(",)",")")
                    return rep
        # No macro matches at top level, or top level not wished.
        # Let us dig a bit
        return(co.buildcrs(crsrewrite=crewrite))
    else :
        return(crs)
Пример #7
0
def cmatch(macro, cobj):
    """
    Analyze if macro does match cobj, and return the list of objects
    matching macro arguments, ordered by depth-first traversal
    """
    clogger.debug("matching " + ` macro ` + " and " + ` cobj `)
    if isinstance(cobj, ctree) and isinstance(macro, ctree) and macro.operator == cobj.operator:
        nok = False
        for mpara, para in zip(macro.parameters, cobj.parameters):
            if mpara != para or macro.parameters[para] != cobj.parameters[para]:
                nok = True
        if nok:
            return []
        argsub = []
        for mop, op in zip(macro.operands, cobj.operands):
            if isinstance(mop, cdummy):
                argsub.append(op)
            else:
                argsub += cmatch(mop, op)
        return argsub
    elif isinstance(cobj, scriptChild) and isinstance(macro, scriptChild) and macro.varname == cobj.varname:
        return cmatch(macro.father, cobj.father, argslist)
    elif isinstance(cobj, cpage) and isinstance(macro, cpage):
        argsub = []
        if cobj.heights == macro.heights and cobj.widths == macro.widths and cobj.orientation == macro.orientation:
            for mlines, lines in zip(macro.fig_lines, cobj.fig_lines):
                for mfig, fig in zip(mlines, lines):
                    if isinstance(mfig, cdummy):
                        argsub.append(fig)
                    else:
                        argsub += cmatch(mfig, fig)
        return argsub
    else:
        return []
Пример #8
0
def cshow(obj) :
    """ 
    Provide the in-memory value of a CliMAF object. 
    For a figure object, this will lead to display it
    ( launch computation if needed. )
    """
    clogger.debug("cshow called on "+str(obj)) 
    return climaf.driver.ceval(obj,format='MaskedArray')
Пример #9
0
def is_derived_variable(variable,project):
    """ True if the variable is a derived variable, either in provided project
    or in wildcard project '*'
    """
    rep= (project in derived_variables and variable in derived_variables[project] or \
            "*"     in derived_variables and variable in derived_variables["*"])
    clogger.debug("Checking if variable %s is derived for project %s : %s"%(variable,project,rep))
    return(rep)
Пример #10
0
def selectCmip5DrsFiles(urls, **kwargs) :
    # example for path : CMIP5/output1/CNRM-CERFACS/CNRM-CM5/1pctCO2/mon/atmos/
    #      Amon/r1i1p1/v20110701/clivi/clivi_Amon_CNRM-CM5_1pctCO2_r1i1p1_185001-189912.nc
    # second path segment can be any string (allows for : output,output1, merge...), 
    # but if 'merge' exists, it is used alone
    # If version is 'last', tries provide version from directory 'last' if available,
    # otherwise those of last dir
    project=kwargs['project']
    model=kwargs['model']
    simulation=kwargs['simulation']
    frequency=kwargs['frequency']
    variable=kwargs['variable']
    realm=kwargs['realm']
    table=kwargs['table']
    period=kwargs['period']
    experiment=kwargs['experiment']
    version=kwargs['version']
    #
    rep=[]
    frequency2drs=dict({'monthly':'mon'})
    freqd=frequency
    if frequency in frequency2drs : freqd=frequency2drs[frequency]
    # TBD : analyze ambiguity of variable among realms+tables
    for l in urls :
        pattern1=l+"/"+project+"/merge"
        if not os.path.exists(pattern1) : pattern1=l+"/"+project+"/*"
        patternv=pattern1+"/*/"+model+"/"+experiment+"/"+freqd+"/"+realm+"/"+table+"/"+simulation
        # Get version directories list
        ldirs=glob.glob(patternv)
        #print "looking at "+patternv+ " gives:" +`ldirs`
        for repert in ldirs :
            lversions=os.listdir(repert)
            lversions.sort()
            #print "lversions="+`lversions`+ "while version="+version
            cversion=version # initial guess of the version to use
            if (version == "last") :
                if (len(lversions)== 1) : cversion=lversions[0]
                elif (len(lversions)> 1) :
                    if "last" in lversions : cversion="last"
                    else :
                        cversion=lversions[-1] # Assume that order provided by sort() is OK
            #print "using version "+cversion+" for requested version: "+version
            lfiles=glob.glob(repert+"/"+cversion+"/"+variable+"/*.nc")
            #print "listing "+repert+"/"+cversion+"/"+variable+"/*.nc"
            #print 'lfiles='+`lfiles`
            for f in lfiles :
                if freqd != 'fx' :
                    #clogger.debug("checking period for "+ f)
                    regex=r'^.*([0-9]{4}[0-9]{2}-[0-9]{4}[0-9]{2}).nc$'
                    fileperiod=init_period(re.sub(regex,r'\1',f))
                    if (fileperiod and period.intersects(fileperiod)) :
                        rep.append(f)
                else :
                    clogger.debug("adding fixed field "+ f)
                    rep.append(f)

    return rep
Пример #11
0
    def hasRawVariable(self) :
        """ Test local data files to tell if a dataset variable is actually included 
        in files (rather than being a derived, virtual variable)

        For the time being, returns False, which leads to always consider that variables
        declared as 'derived' actually are derived """
        clogger.debug("TBD: actually test variables in files, rather than assuming that variable %s is virtual for dataset %s"\
                        %(self.variable,self.crs))
        return(False)
Пример #12
0
def is_derived_variable(variable, project):
    """ True if the variable is a derived variable, either in provided project
    or in wildcard project '*'
    """
    rep= (project in derived_variables and variable in derived_variables[project] or \
            "*"     in derived_variables and variable in derived_variables["*"])
    clogger.debug("Checking if variable %s is derived for project %s : %s" %
                  (variable, project, rep))
    return (rep)
Пример #13
0
def vertical_average(dat, zmin, zmax):
    """
    Computes a vertical average on the vertical levels between zmin and zmax
    """
    levs = getLevs(dat, zmin, zmax)
    clogger.debug(' --> Compute average on the following vertical levels : ' +
                  levs)
    tmp = ccdo(dat, operator="'vertmean -sellevel,'+levs'")
    return tmp
Пример #14
0
def hasExactObject(cobject) :
    # First read index from file if it is yet empty
    # NO! : done at startup - if len(crs2filename.keys()) == 0 : cload()
    if cobject.crs in crs2filename :
        f=crs2filename[cobject.crs]
        if os.path.exists(f) :
            return f
        else :
            clogger.debug("Dropping cobject.crs from cache index, because file si missing")
            crs2filename.pop(cobject.crs)
Пример #15
0
def derived_variable(variable,project):
    """ Returns the entry defining a derived variable in requested project or in wildcard project '*'
    """
    if project in derived_variables and variable in derived_variables[project] :
        rep=derived_variables[project][variable]
    elif "*"   in derived_variables and variable in derived_variables["*"] :
        rep=derived_variables['*'][variable]
    else : rep=None
    clogger.debug("Derived variable %s for project %s is %s"%(variable,project,rep))
    return(rep)
Пример #16
0
def cexport(*args,**kwargs) :
    """ Alias for climaf.driver.ceval. Create synonyms for arg 'format'

    """
    clogger.debug("cexport called with arguments"+str(args))  
    if "format" in kwargs :
        if (kwargs['format']=="NetCDF" or kwargs['format']=="netcdf" or kwargs['format']=="nc") :
            kwargs['format']="file" 
        if (kwargs['format']=="MA") :
            kwargs['format']="MaskedArray" 
    return climaf.driver.ceval(*args,**kwargs)
Пример #17
0
def selectEmFiles(**kwargs):
    #Pour A et L : mon, day1, day2, 6hLev, 6hPlev, 3h
    simulation = kwargs['simulation']
    frequency = kwargs['frequency']
    variable = kwargs['variable']
    period = kwargs['period']
    realm = kwargs['realm']
    #
    freqs = {"mon": "", "3h": "_3h"}
    f = frequency
    if f in freqs: f = freqs[f]
    rep = []
    # Must look for all realms, here identified by a single letter
    if realm == "*": lrealm = ["A", "L", "O", "I"]
    else: lrealm = [realm]
    for realm in lrealm:
        clogger.debug("Looking for realm " + realm)
        # Use EM data for finding data dir
        freq_for_em = f
        if realm == 'I': freq_for_em = ""  # This is a special case ...
        command = [
            "grep", "^export EM_DIRECTORY_" + realm + freq_for_em + "=",
            os.path.expanduser(os.getenv("EM_HOME")) + "/expe_" + simulation
        ]
        try:
            ex = subprocess.Popen(command,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
        except:
            clogger.error("Issue getting archive_location for " + simulation +
                          " for realm " + realm + " with: " + ` command `)
            break
        if ex.wait() == 0:
            dir = ex.stdout.read().split("=")[1].replace('"',
                                                         "").replace("\n", "")
            clogger.debug("Looking at dir " + dir)
            if os.path.exists(dir):
                lfiles = os.listdir(dir)
                for fil in lfiles:
                    #clogger.debug("Looking at file "+fil)
                    fileperiod = periodOfEmFile(fil, realm, f)
                    if fileperiod and period.intersects(fileperiod):
                        if fileHasVar(dir + "/" + fil, variable):
                            rep.append(dir + "/" + fil)
                    #clogger.debug("Done with Looking at file "+fil)
            else:
                clogger.error(
                    "Directory %s does not exist for simulation %s, realm %s "
                    "and frequency %s" % (dir, simulation, realm, f))
        else:
            clogger.info("No archive location found for " + simulation +
                         " for realm " + realm + " with: " + ` command `)
    return rep
Пример #18
0
def derived_variable(variable, project):
    """ Returns the entry defining a derived variable in requested project or in wildcard project '*'
    """
    if project in derived_variables and variable in derived_variables[project]:
        rep = derived_variables[project][variable]
    elif "*" in derived_variables and variable in derived_variables["*"]:
        rep = derived_variables['*'][variable]
    else:
        rep = None
    clogger.debug("Derived variable %s for project %s is %s" %
                  (variable, project, rep))
    return rep
Пример #19
0
def generateUniqueFileName_safe(expression, operator=None, format="nc"):
    """ Generate a filename path from string EXPRESSION and FILEFORMAT, unique for the
    expression and the set of cache directories currently listed in cache.cachedirs 
    OPERATOR may be a function that provides a prefix, using EXPRESSION

    This uses hashlib.sha224, which are truncated to 3 (or more) characters. 
    More characters are used if a shorter name is already in use for another
    expression in one of the known cache directories 

    Generated names drive a structure where each directory name 1 or 2
    characters and file names have no more characters

    Exits if uniqueness is unachievable (quite unexpectable !) """
    #
    if format == None: return ""
    prefix = ""
    if operator is not None:
        prefix2 = operator(expression)
        if prefix2 is not None: prefix = prefix2 + "/"
    full = hashlib.sha224(expression).hexdigest()
    number = fileNameLength
    guess = full[0:number - 1]
    existing = searchFile(prefix + stringToPath(guess, directoryNameLength) +
                          "." + format)
    if existing:
        readCRS = getCRS(existing)
        # Update index if needed
        if readCRS not in crs2filename:
            clogger.warning("existing data %s in file %s was not yet registered in cache index"%\
                                (readCRS,existing))
            crs2filename[readCRS] = existing
    while ((existing is not None) and (readCRS != expression)):
        clogger.debug("must skip %s which CRS is %s"%\
                      (existing, getCRS(existing) ))
        number += 2
        if (number >= len(full)):
            clogger.critical("Critical issue in cache : " + len(full) +
                             " digits is not enough for " + expression)
            exit
        guess = full[0:number - 1]
        existing = searchFile(prefix +
                              stringToPath(guess, directoryNameLength) + "." +
                              format)
        if existing: readCRS = getCRS(existing)
    rep = currentCache + "/" + prefix + stringToPath(
        full[0:number - 1], directoryNameLength) + "." + format
    rep = os.path.expanduser(rep)
    # Create the relevant directory, so that user scripts don't have to care
    dirn = os.path.dirname(rep)
    if not os.path.exists(dirn): os.makedirs(dirn)
    clogger.debug("returning %s" % rep)
    return (rep)
Пример #20
0
def searchFile(path):
    """ Search for first occurrence of PATH as a path in all 
    directories listed in CACHEDIRS
    """
    for cdir in cachedirs :
        candidate=os.path.expanduser(cdir+"/"+path)        
        if os.path.lexists(candidate):
            # If this is a broken link, delete it ~ silently and return None
            if not os.path.exists(candidate):
                clogger.debug("Broken link for %s was deleted"%candidate)
                os.remove(candidate)
                return None
            return candidate
Пример #21
0
 def baseFiles(self,force=False):
     """ Returns the list of (local) files which include the data for the dataset
     Use cached value unless called with arg force=True
     """
     if force or self.files is None :
         dic=self.kvp.copy()
         if self.alias : 
             filevar,scale,offset,units,filenameVar,missing=self.alias
             dic["variable"]=filevar
             if filenameVar : dic["filenameVar"]=filenameVar
         clogger.debug("Looking with dic=%s"%`dic`)
         self.files=dataloc.selectLocalFiles(**dic)
     return self.files
Пример #22
0
def searchFile(path):
    """ Search for first occurrence of PATH as a path in all
    directories listed in CACHEDIRS
    """
    for cdir in cachedirs:
        candidate = os.path.expanduser(cdir + "/" + path)
        if os.path.lexists(candidate):
            # If this is a broken link, delete it ~ silently and return None
            if not os.path.exists(candidate):
                clogger.debug("Broken link for %s was deleted" % candidate)
                os.remove(candidate)
                return None
            return candidate
Пример #23
0
def varOf(cobject) :
    """ Returns the variable for a CliMAF object : if object is a dataset, returns
    its 'variable' property, otherwise returns variable of first operand
    """
    if isinstance(cobject,classes.cdataset) : return cobject.variable
    elif isinstance(cobject,classes.cens) : return varOf(cobject.members[0])
    elif getattr(cobject,"variable",None) : 
        return getattr(cobject,"variable",None) 
    elif isinstance(cobject,classes.ctree) :
        clogger.debug("for now, varOf logic is basic (1st operand) - TBD")
        return varOf(cobject.operands[0])
    elif isinstance(cobject,cmacro.cdummy) :
        return "dummy"
    else : raise Climaf_Driver_Error("Unknown class for argument "+`cobject`)
Пример #24
0
def generateUniqueFileName(expression, operator=None, format="nc"):
    """ Generate a filename path from string EXPRESSION and FILEFORMAT, unique for the
    expression and the set of cache directories currently listed in cache.cachedirs 
    OPERATOR may be a function that provides a prefix, using EXPRESSION

    This uses hashlib.sha224, which are truncated to 3 (or more) characters. 
    More characters are used if a shorter name is already in use for another
    expression in one of the known cache directories 

    Generated names drive a structure where each directory name 1 or 2
    characters and file names have no more characters

    Exits if uniqueness is unachievable (quite unexpectable !) """
    #
    import hashlib
    directoryNameLength=2
    #
    if format==None : return ""
    prefix=""
    if operator is not None :
        prefix2=operator(expression)
        if prefix2 is not None : prefix=prefix2+"/"
    full=hashlib.sha224(expression).hexdigest()
    number=4
    guess=full[0 : number - 1 ]
    existing=searchFile(prefix+stringToPath(guess, directoryNameLength )+"."+format)
    if existing : 
        readCRS=getCRS(existing)
        # Update index if needed
        if readCRS not in crs2filename :
            clogger.warning("existing data %s in file %s was not yet registered in cache index"%\
                                (readCRS,existing))
            crs2filename[readCRS]=existing
    while ( ( existing is not None ) and ( readCRS != expression )) :
        clogger.debug("must skip %s which CRS is %s"%\
                      (existing, getCRS(existing) ))
        number += 2
        if (number >= len(full) ) :
            clogger.critical("Critical issue in cache : "+len(full)+" digits is not enough for "+expression)
            exit
        guess=full[0 : number - 1 ]
        existing=searchFile(prefix+stringToPath(guess, directoryNameLength )+"."+format)
        if existing : readCRS=getCRS(existing)
    rep=currentCache+"/"+prefix+stringToPath(full[0 : number - 1 ], directoryNameLength )+"."+format
    rep=os.path.expanduser(rep)
    # Create the relevant directory, so that user scripts don't have to care
    dirn=os.path.dirname(rep)
    if not os.path.exists(dirn) : os.makedirs(dirn)
    clogger.debug("returning %s"%rep)
    return(rep)
Пример #25
0
def cfilePage(cobj, deep, recurse_list=None) :
    """
    Builds a page with CliMAF figures, computing associated crs

    Args:
     cobj (cpage object)
     
    Returns : the filename in CliMAF cache, which contains the result (and None if failure)

    """
    if not isinstance(cobj,classes.cpage):
        raise Climaf_Driver_Error("cobj is not a cpage object")
    clogger.debug("Computing figure array for cpage %s"%(cobj.crs))
    #
    # page size and creation
    if cobj.orientation == "portrait":
        page_width=800. ; page_height=1200.
    elif cobj.orientation == "landscape":
        page_width=1200. ; page_height=800.
    page_size="%dx%d"%(page_width, page_height)
    args=["convert", "-size", page_size, "xc:white"]
    #
    # margins
    x_left_margin=10. # Left shift at start and end of line
    y_top_margin=10. # Initial vertical shift for first line
    x_right_margin=10. # Right shift at start and end of line
    y_bot_margin=10. # Vertical shift for last line
    xmargin=20. # Horizontal shift between figures
    ymargin=20. # Vertical shift between figures
    #
    usable_height=page_height-ymargin*(len(cobj.heights)-1.)-y_top_margin -y_bot_margin
    usable_width =page_width -xmargin*(len(cobj.widths)-1.) -x_left_margin-x_right_margin
    #
    # page composition
    y=y_top_margin
    for line, rheight in zip(cobj.fig_lines, cobj.heights) :
        # Line height in pixels
        height=usable_height*rheight 
        x=x_left_margin
        for fig, rwidth in zip(line, cobj.widths) :
            # Figure width in pixels
            width=usable_width*rwidth 
            scaling="%dx%d+%d+%d" %(width,height,x,y)
            if fig : 
                figfile=ceval(fig,format="file", deep=deep, recurse_list=recurse_list)
            else : figfile='xc:None'
            clogger.debug("Compositing figure %s",fig.crs if fig else 'None')
            args.extend([figfile , "-geometry", scaling, "-composite" ])
            x+=width+xmargin
        y+=height+ymargin
    out_fig=cache.generateUniqueFileName(cobj.buildcrs(), format="png")
    args.append(out_fig)
    clogger.debug("Compositing figures : %s"%`args`)
    comm=subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if comm.wait()!=0 :
        raise Climaf_Driver_Error("Compositing failed : %s" %comm.stderr.read())
    if cache.register(out_fig,cobj.crs) :
        clogger.debug("Registering file %s for cpage %s"%(out_fig,cobj.crs))
        return out_fig
Пример #26
0
def selectEmFiles(**kwargs) :
    #POur A et L : mon, day1, day2, 6hLev, 6hPlev, 3h
    simulation=kwargs['simulation']
    frequency=kwargs['frequency']
    variable=kwargs['variable']
    period=kwargs['period']
    realm=kwargs['realm']
    #
    freqs={ "mon" : "" , "3h" : "_3h"}
    f=frequency
    if f in freqs : f=freqs[f]
    rep=[]
    # Must look for all realms, here identified by a single letter
    if realm=="*" : lrealm= ["A", "L", "O", "I" ]
    else: lrealm=[ realm ]
    for realm in lrealm :
        clogger.debug("Looking for realm "+realm)
        # Use EM data for finding data dir
        freq_for_em=f
        if realm == 'I' : freq_for_em=""  # This is a special case ...
        command=["grep", "^export EM_DIRECTORY_"+realm+freq_for_em+"=",
                 os.path.expanduser(os.getenv("EM_HOME"))+"/expe_"+simulation ]
        try :
            ex = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except :
            clogger.error("Issue getting archive_location for "+
                          simulation+" for realm "+realm+" with: "+`command`)
            break
        if ex.wait()==0 :
            dir=ex.stdout.read().split("=")[1].replace('"',"").replace("\n","")
            clogger.debug("Looking at dir "+dir)
            if os.path.exists(dir) :
                lfiles= os.listdir(dir)
                for fil in lfiles :
                    #clogger.debug("Looking at file "+fil)
                    fileperiod=periodOfEmFile(fil,realm,f)
                    if fileperiod and period.intersects(fileperiod) :
                        if fileHasVar(dir+"/"+fil,variable) :
                            rep.append(dir+"/"+fil)
                    #clogger.debug("Done with Looking at file "+fil)
            else : clogger.error("Directory %s does not exist for EM simulation %s, realm %s "
                                 "and frequency %s"%(dir,simulation,realm,f))
        else :
            clogger.info("No archive location found for "+
                          simulation+" for realm "+realm+" with: "+`command`)
    return rep
Пример #27
0
def  cMA(obj,deep=None) :
    """
    Provide the Masked Array value for a CliMAF object. Launch computation if needed.

    Args:
      obj (CliMAF object) : either a datset or a 'compound' object (like the result of a CliMAF standard operator)
      deep (logical, optional) : governs the use of cached values when computing the object

        - if missing, or None : use cache as much as possible
        - False : make a shallow computation, i.e. do not use cached values for top level operation
        - True  : make a deep computation, i.e. do not use any cached value

    Returns:
      a Masked Array containing the object's value

    """
    clogger.debug("cMA called with arguments"+str(obj)) 
    return climaf.driver.ceval(obj,format='MaskedArray',deep=deep)
Пример #28
0
def selectExampleFiles(urls,**kwargs) :
    rep=[]
    if (kwargs['frequency'] == "monthly") :
        for l in urls :
            for realm in ["A","L"] :
                #dir=l+"/"+realm+"/Origin/Monthly/"+simulation
                dir=l+"/"+realm
                clogger.debug("Looking at dir "+dir)
                if os.path.exists(dir) :
                    lfiles= os.listdir(dir)
                    for f in lfiles :
                        clogger.debug("Looking at file "+f)
                        fileperiod=periodOfEmFile(f,realm,'mon')
                        if fileperiod and fileperiod.intersects(kwargs['period']) :
                            if fileHasVar(dir+"/"+f,kwargs['variable']) :
                                rep.append(dir+"/"+f)
                            #else: print "No var ",variable," in file", dir+"/"+f
    return rep
Пример #29
0
def attributeOf(cobject,attrib) :
    """ Returns the attribute for a CliMAF object : if object is a dataset, returns
    its attribute property, otherwise returns attribute of first operand
    """
    if isinstance(cobject,classes.cdataset) : 
        val=getattr(cobject,attrib,None) 
        if val is not None : return val
        else : return(cobject.kvp.get(attrib))
    elif isinstance(cobject,classes.cens) : return attributeOf(cobject.members[0],attrib)
    elif getattr(cobject,attrib,None) : return getattr(cobject,attrib) 
    elif isinstance(cobject,classes.ctree) :
        clogger.debug("for now, varOf logic is basic (1st operand) - TBD")
        return attributeOf(cobject.operands[0],attrib)
    elif isinstance(cobject,cmacro.cdummy) :
        return "dummy"
    elif isinstance(cobject,classes.cpage) : return None
    elif cobject is None : return ''
    else : raise Climaf_Driver_Error("Unknown class for argument "+`cobject`)
Пример #30
0
def read(filename):
    """
    Read macro dictionary from filename, and add it to cmacros[]
    """
    import json
    global cmacros
    macros_texts = None
    try:
        macrofile = file(os.path.expanduser(filename), "r")
        clogger.debug("Macrofile %s read" % (macrofile))
        macros_texts = json.load(macrofile)
        clogger.debug("After reading file %s, macros=%s" %
                      (macrofile, ` macros_texts `))
        macrofile.close()
    except:
        clogger.info("Issue reading macro file %s ", filename)
    if macros_texts:
        for m in macros_texts:
            clogger.debug("loading macro %s=%s" % (m, macros_texts[m]))
            macro(str(m), str(macros_texts[m]))
Пример #31
0
def ceval_select(includer,included,userflags,format,deep,derived_list,recurse_list) :
    """ Extract object INCLUDED from (existing) object INCLUDER,
    taking into account the capability of the user process (USERFLAGS)
    and the required delivering FORMAT(file or object)
    """
    if format=='file' : 
        if userflags.canSelectTime or userflags.canSelectDomain:
            clogger.debug("TBD - should do smthg smart when user can select time or domain")
            #includer.setperiod(included.period)
        incperiod=timePeriod(included)
	clogger.debug("extract sub period %s out of %s"%(`incperiod`,includer.crs))
        extract=capply('select',includer, period=`incperiod`)
        objfile=ceval(extract,userflags,'file',deep,derived_list,recurse_list)
	if objfile :
            crs=includer.buildcrs(period=incperiod)
            return(cache.rename(objfile,crs))
        else :
            clogger.critical("Cannot evaluate "+`extract`)
    else :
        clogger.error("Can yet process only files - TBD")
Пример #32
0
def getCRS(filename) :
    """ Returns the CRS expression found in FILENAME's meta-data"""
    import subprocess
    if re.findall(".nc$",filename) : 
        form='ncdump -h %s | grep -E "CRS_def *=" | '+\
            'sed -r -e "s/.*:CRS_def *= *\\\"(.*)\\\" *;$/\\1/" '
    elif re.findall(".png$",filename) :
        form='identify -verbose %s | grep -E " *CRS_def: " | sed -r -e "s/.*CRS_def: *//"'
    else :
        clogger.critical("unknown filetype for %s"%filename)
        return None
    command=form%filename
    try:
        rep=subprocess.check_output(command, shell=True).replace('\n','')
        if (rep == "" ) : 
            clogger.error("file %s is not well formed (no CRS)"%filename)
        if re.findall(".nc$",filename) : rep=rep.replace(r"\'",r"'")
    except:
        rep="failed"
    clogger.debug("CRS expression read in %s is %s"%(filename,rep))
    return rep
Пример #33
0
def capply_script (script_name, *operands, **parameters):
    """ Create object for application of a script to OPERANDS with keyword PARAMETERS."""
    
    if script_name not in operators.scripts :
        raise Climaf_Driver_Error("Script %s is not know. Consider declaring it "
                                  "with function 'cscript'", script_name)
    script=operators.scripts[script_name]
    if len(operands) != script.inputs_number() : 
        raise Climaf_Driver_Error("Operator %s is "
                                  "declared with %d input streams, while you provided %d. Get doc with 'help(%s)'"%(
                script_name,script.inputs_number(),len(operands), script_name ))
    #
    # Check that all parameters to the call are expected by the script 
    for para in parameters :
        if re.match(r".*\{"+para+r"\}",script.command) is None :
            if re.match(r".*\{"+para+r"_iso\}",script.command) is None :
                if para != 'member_label' :
                    raise Climaf_Driver_Error("parameter '%s' is not expected by script %s"
                                          "(which command is : %s)"%(para,script_name,script.command))
    #
    # Check that only first operand can be an ensemble
    opscopy=[ o for o in operands ]
    opscopy.remove(opscopy[0])
    for op in opscopy  :
        if isinstance(op,classes.cens ):
            raise Climaf_Driver_Error("Cannot yet have an ensemble as operand except as first one")
    # 
    #print "op(0)="+`operands[0]`
    #print "script=%s , script.flags.commuteWithEnsemble="%script_name+`script.flags.commuteWithEnsemble`
    if (isinstance(operands[0],classes.cens) and script.flags.commuteWithEnsemble) :
        # Must iterate on members
        reps=[]
        for member,label in zip(operands[0].members,operands[0].labels) :
            clogger.debug("processing member "+`member`)
            params=parameters.copy()
            params["member_label"]=label
            reps.append(maketree(script_name, script, member, *opscopy, **params))
        return(classes.cens(operands[0].labels,*reps))
    else: 
        return(maketree(script_name, script, *operands, **parameters))
Пример #34
0
def generateUniqueFileName_unsafe(expression, format="nc"):
    """
    Generate a filename path from string EXPRESSION and FILEFORMAT,
    almost unique for the expression and the cache directory

    This uses hashlib.sha224, which are truncated to fileNameLength. 

    Generated names drive a structure where each directory name 
    has dirNameLength characters
    """
    #
    if format == None: return ""
    prefix = ""
    full = hashlib.sha224(expression).hexdigest()
    rep = currentCache + "/" + prefix + stringToPath(
        full[0:fileNameLength - 1], directoryNameLength) + "." + format
    rep = os.path.expanduser(rep)
    # Create the relevant directory, so that user scripts don't have to care
    dirn = os.path.dirname(rep)
    if not os.path.exists(dirn): os.makedirs(dirn)
    clogger.debug("returning %s" % rep)
    return (rep)
Пример #35
0
def crewrite(crs, alsoAtTop=True):
    """
    Return the crs expression with sub-trees replaced by macro equivalent 
    when applicable

    Search order is : from CRS tree root try all macros, then do the
    same for first subtree, and recursively in depth, and then go 
    to second subtreesecond 
    """
    # Next line used for interpreting macros's CRS
    exec("ARG=climaf.cmacro.cdummy()", sys.modules['__main__'].__dict__)
    #
    allow_error_on_ds()
    try:
        co = eval(crs, sys.modules['__main__'].__dict__)
    except:
        clogger.debug("Issue when rewriting %s" % crs)
        return (crs)
    allow_error_on_ds(False)
    if isinstance(co, ctree) or isinstance(co, scriptChild) or isinstance(
            co, cpage):
        if alsoAtTop:
            for m in cmacros:
                clogger.debug("looking at macro : "+m+"="+`cmacros[m]`+\
                           " \ncompared to : "+`macro(None,co)`)
                argl = cmatch(cmacros[m], co)
                if len(argl) > 0:
                    rep = m + "("
                    for arg in argl:
                        rep += crewrite(
                            arg.buildcrs(crsrewrite=crewrite)) + ","
                    rep += ")"
                    rep = rep.replace(",)", ")")
                    return rep
        # No macro matches at top level, or top level not wished.
        # Let us dig a bit
        return (co.buildcrs(crsrewrite=crewrite))
    else:
        return (crs)
Пример #36
0
def eds(**kwargs):
    """
    Create a dataset ensemble using the same calling sequence as
    :py:func:`~climaf.classes.cdataset`, except that one of the facets
    is a list, for defining the nsemble members; this facet must be among
    the facets authorized for ensemble in the (single) project involved

    Example::

    >>> cdef("frequency","monthly") ;  cdef("project","CMIP5"); cdef("model","CNRM-CM5")
    >>> cdef("variable","tas"); cdef("period","1860")
    >>> ens=eds(experiment="historical", simulation=["r1i1p1","r2i1p1"])

    """
    attval=processDatasetArgs(**kwargs)
    # Check that any facet/attribute of type 'list' (for defining an
    # ensemble) is OK for the project, and that there is at most one
    nlist=0
    listattr=None
    for attr in attval :
        clogger.debug("Looking at attr %s for ensemble"%attr)
        if isinstance(attval[attr], list) and attr != "domain":
            if not attr in cprojects[attval["project"]].attributes_for_ensemble :
                raise Climaf_Classes_Error("Attribute %s cannot be used for ensemble"%attr)
            clogger.debug("Attr %s is used for an ensemble"%attr)
            nlist+=1
            listattr=attr
    if nlist != 1 :
        raise Climaf_Classes_Error("Must ask for an ensemble on exactly one attribute")
    #
    # Create an ensemble of datasets if applicable
    labels=[]; members=[]
    for member in attval[listattr] :
        attval2=attval.copy()
        attval2[listattr]=member
        members.append(cdataset(**attval2))
        labels.append(member)
    return cens(labels,*members)
Пример #37
0
def cread(datafile,varname=None):
    import re
    if not datafile : return(None)
    if re.findall(".png$",datafile) :
        subprocess.Popen(["display",datafile,"&"])
    elif re.findall(".nc$",datafile) :
        clogger.debug("reading NetCDF file %s"%datafile)
        if varname is None: varname=varOfFile(datafile)
        if varname is None: return(None)
        from Scientific.IO.NetCDF import NetCDFFile as ncf
        fileobj=ncf(datafile)
        #import netCDF4
        #fileobj=netCDF4.Dataset(datafile)
        # Note taken from the CDOpy developper : .data is not backwards 
        # compatible to old scipy versions, [:] is
        data=fileobj.variables[varname][:]
        fillv=fileobj.variables[varname]._FillValue
        import numpy.ma
        rep= numpy.ma.array(data,mask = data==fillv)
        fileobj.close()
        return(rep)
    else :
        clogger.error("cannot yet handle %s"%datafile)
        return None
Пример #38
0
def cimport(cobject,crs) :
    clogger.debug("cimport called with argument",cobject)  
    clogger.debug("should check syntax of arg 'crs' -TBD")
    clogger.warning("cimport is not for the dummies - Playing at your own risks !")
    import numpy, numpy.ma
    if isinstance(cobject,numpy.ma.MaskedArray) :
        clogger.debug("for now, use a file for importing - should revisit - TBD")
        clogger.error("not yet implemented fro Masked Arrays - TBD")
    elif isinstance(cobject,str) :
        cache.register(cobject,crs)
    else :
        clogger.error("argument is not a Masked Array nor a filename",cobject)
Пример #39
0
 def store_wildcard_facet_values(f,
                                 facets_regexp,
                                 kwargs,
                                 wildcards,
                                 merge_periods_on=None,
                                 fperiod=None,
                                 periods=None,
                                 periods_dict=None):
     """
     Using a (groups-capable) regexp FACETS_REGEXP for finding facet values, analyze
     string F for finding the value of each keyword (facet name) in KWARGS, and stores
     it in dict WILDCARDS, which keys are facet names and values are set of encountered
     values
     Regarding periods, ... (TBD)
     """
     if fperiod is not None and periods is not None:
         clogger.debug('Adding period %s' % fperiod)
         periods.append(fperiod)
     #
     for kw in kwargs:
         it = re.finditer(facets_regexp, f)
         for oc in it:
             try:
                 facet_value = oc.group(kw)
             except:
                 continue
             if type(kwargs[kw]) is str and ("*" in kwargs[kw]
                                             or "?" in kwargs[kw]):
                 if facet_value is not None:
                     if kw not in wildcards:
                         wildcards[kw] = set()
                     wildcards[kw].add(facet_value)
                     clogger.debug("Discover %s=%s for file=%s" %
                                   (kw, facet_value, f))
                 else:
                     clogger.debug("Logic issue for kw=%s and file=%s" %
                                   (kw, f))
                 #
                 if fperiod is not None and periods is not None:
                     if merge_periods_on is None:
                         key = None
                     elif kw == merge_periods_on:
                         key = facet_value
                     else:
                         # print "Skipping for kw=%s,sort=%s"%(kw,merge_periods_on)
                         continue
                     if key not in periods_dict:
                         periods_dict[key] = set()
                     # print "adding period %s for key %s"%(fperiod,key)
                     periods_dict[key].add(fperiod)
                 else:
                     pass
Пример #40
0
def timePeriod(cobject) :
    """ Returns a time period for a CliMAF object : if object is a dataset, returns
    its time period, otherwise returns time period of first operand
    """
    if isinstance(cobject,classes.cdataset) : return cobject.period
    elif isinstance(cobject,classes.ctree) :
        clogger.debug("for now, timePeriod logic for scripts output is basic (1st operand) - TBD")
        return timePeriod(cobject.operands[0])
    elif isinstance(cobject,classes.scriptChild) :
        clogger.debug("for now, timePeriod logic for scriptChilds is basic - TBD")
        return timePeriod(cobject.father)
    elif isinstance(cobject,classes.cens) :
        clogger.debug("for now, timePeriod logic for 'cens' objet is basic (1st member)- TBD")
        return timePeriod(cobject.members[0])
    else : clogger.error("unkown class for argument "+`cobject`)
Пример #41
0
def selectFiles(return_wildcards=None, merge_periods_on=None, **kwargs):
    """
    Returns the shortest list of (local or remote) files which include
    the data for the list of (facet,value) pairs provided

    Method :

    - use datalocations indexed by :py:func:`~climaf.dataloc.dataloc` to
      identify data organization and data store urls for these (facet,value)
      pairs

    - check that data organization is as known one, i.e. is one of 'generic',
      CMIP5_DRS' or 'EM'

    - derive relevant filenames search function such as as :
      py:func:`~climaf.dataloc.selectCmip5DrsFiles` from data
      organization scheme

    - pass urls and relevant facet values to this filenames search function

    """
    rep = []
    project = kwargs['project']
    simulation = kwargs['simulation']

    if 'model' in kwargs:
        model = kwargs['model']
    else:
        model = "*"
    if 'frequency' in kwargs:
        frequency = kwargs['frequency']
    else:
        frequency = "*"

    ofu = getlocs(project=project,
                  model=model,
                  simulation=simulation,
                  frequency=frequency)
    clogger.debug("locs=" + repr(ofu))
    if len(ofu) == 0:
        clogger.warning("no datalocation found for %s %s %s %s " %
                        (project, model, simulation, frequency))
    for org, freq, urls in ofu:
        if return_wildcards is not None and org is not "generic":
            raise classes.Climaf_Error(
                "Can handle multipe facet query only for organization=generic "
            )
        kwargs2 = kwargs.copy()
        # Convert normalized frequency to project-specific frequency if applicable
        if "frequency" in kwargs and project in classes.frequencies:
            normfreq = kwargs2['frequency']
            if normfreq in classes.frequencies[project]:
                kwargs2['frequency'] = classes.frequencies[project][normfreq]
        # JS # Convert normalized realm to project-specific realm if applicable
        if "realm" in kwargs and project in classes.realms:
            normrealm = kwargs2['realm']
            if normrealm in classes.realms[project]:
                kwargs2['realm'] = classes.realms[project][normrealm]
        #
        # Call organization-specific routine
        if org == "EM":
            rep.extend(selectEmFiles(**kwargs2))
        elif org == "CMIP5_DRS":
            rep.extend(selectCmip5DrsFiles(urls, **kwargs2))
        elif org == "generic":
            rep.extend(
                selectGenericFiles(urls,
                                   return_wildcards=return_wildcards,
                                   merge_periods_on=merge_periods_on,
                                   **kwargs2))
        else:
            raise classes.Climaf_Error("Cannot process organization " + org +
                                       " for simulation " + simulation +
                                       " and model " + model + " of project " +
                                       project)
    if not ofu:
        return None
    else:
        if len(rep) == 0:
            clogger.warning("no file found for %s, at these "
                            "data locations %s " % (repr(kwargs), repr(urls)))
            if any([kwargs[k] == '' for k in kwargs]):
                clogger.warning("Please check these empty attributes %s" %
                                [k for k in kwargs if kwargs[k] == ''])
            return None
    # Discard duplicates (assumes that sorting is harmless for later processing)
    rep.sort()
    last = None
    for f in rep:
        if f == last:
            rep.remove(last)
        last = f
    # Assemble filenames in one single string
    return string.join(rep)
Пример #42
0
    def __init__(self,
                 name,
                 command,
                 format="nc",
                 canOpendap=False,
                 commuteWithTimeConcatenation=False,
                 commuteWithSpaceConcatenation=False,
                 canSelectVar=False,
                 **kwargs):
        """
        Declare a script or binary as a 'CliMAF operator', and define a Python function with the same name

        Args:
          name (str): name for the CliMAF operator.
          command (str): script calling sequence, according to the syntax described below.
          format (str): script outputs format -- either 'nc', 'png', 'pdf', 'eps', 'None'
            or 'graph' ('graph' allows to the user to choose three different graphic output
            formats: 'png', 'pdf' or 'eps') or 'txt' (the text output are not managed by CliMAF,
            but only displayed - 'txt' allows to use e.g. 'ncdump -h' from inside CliMAF);
            defaults to 'nc'
          canOpendap (bool, optional): is the script able to use OpenDAP URIs ? default to False
          commuteWithTimeConcatenation (bool, optional): can the operation commute with concatenation
            of time periods ? set it to true, if the operator can be applied on time
            chunks separately, in order to allow for incremental computation / time chunking;
            defaults to False
          commuteWithSpaceConcatenation (bool, optional): can the operation commute with concatenation
            of space domains ? defaults to False (see commuteWithTimeConcatenation)
          **kwargs : possible keyword arguments, with keys matching '<outname>_var', for providing
            a format string allowing to compute the variable name for output 'outname' (see below).

        Returns:
          None

        The script calling sequence pattern string (arg 'command') indicates how to build the system call
        which actually launches the script, with a match between python objects and formal arguments;

        For introducing the syntax, please consider this example, with the following commands::

        >>> cscript('mycdo','cdo ${operator} ${in} ${out}')
        >>> # define some dataset
        >>> tas_ds = ds(project='example', simulation='AMIPV6ALB2G', variable='tas', period='1980-1981')
        >>> # Apply operator 'mycdo' to dataset 'tas_ds', choosing a given 'operator' argument
        >>> tas_avg = mycdo(tas_ds,operator='timavg')

        CliMAF will later on launch this call behind the curtain::

        $ cdo tim_avg /home/my/tmp/climaf_cache/8a/5.nc /home/my/tmp/climaf_cache/4e/4.nc

        where :

        - the last filename is generated by CliMAF from the formal expression describing 'tas_avg', and
          will receive the result
        - the first filename provides a file generated by CliMAF which includes the data required for tas_ds

        There are a number of examples declared in module :download:`standard_operators
        <../climaf/standard_operators.py>`.

        **Detailed syntax**:

        -  formal arguments appear as : ``${argument}`` (in the example : ``${in}``, ``${out}``, ``${operator}`` )

        -  except for reserved keywords, arguments in the pattern will be
           replaced by the values for corresponding keywords used when invoking
           the diagnostic operator:

          - in the example above : argument ``operator`` is replaced by value ``timavg``,
            which is a keyword known to the external binary called, CDO

        -  reserved argument keywords are :

         - **in, in_<digit>, ins, ins_<digit>, mmin** : they will be
           replaced by CliMAF managed filenames for input data, as
           deduced from dataset description or upstream computation; these
           filenames can actually be remote URLs (if the script can use
           OpenDAP, see args), local 'raw' data files, or CliMAF cache
           filenames

          -  **in** stands for the URL of the first dataset invoked in the
             operator call

          -  **in_<digit>** stands for the next ones, in the same order

          -  **ins** and **ins_<digit>** stand for the case where the script can
             select input from multiple input files or URLs (e.g. when the
             whole period to process spans over multiple files); in that case,
             a single string (surrounded with double quotes) will carry
             multiple URLs

          - **mmin** stands for the case where the script accepts an
            ensemble of datasets (only for first input stream
            yet). CliMAF will replace the keyword by a string
            composed of the corresponding input filenames (not surrounded
            by quotes - please add them yourself in declaration); see also
            ``labels`` below

         -  **var, var_<digit>** : when a script can select a variable in a
            multi-variable input stream, this is declared by adding this
            keyword in the calling sequence; CliMAF will replace it by the
            actual variable name to process; 'var' stands for first input
            stream, 'var_<digit>' for the next ones;

            - in the example above, we assume that external binary CDO is
              not tasked with selecting the variable, and that CliMAF must
              feed CDO with a datafile where it has already performed the
              selection


         - **period, period_<digit>** : when a script can select a time
           period in the content of a file or stream, it should declare it
           by putting this keyword in the pattern, which will be replaced at
           call time by the period written as <date1>-<date2>, where date is
           formated as YYYYMMDD ;

            - time intervals must be interpreted as [date1, date2[

            - 'period' stands for the first input_stream,

            - 'period_<n>' for the next ones, in the order of actual call;

           - in the example above, this keyword is not used, which means that
             CliMAF has to select the period upstream of feeding CDO with the
             data

         - **period_iso, period_iso_<digit>** : as for **period** above,
           except that the date formating fits CDO conventions :

            - date format is ISO : YYYY-MM-DDTHH:MM:SS

            - interval is [date1,date2_iso], where date2_iso is 1 minute before
              date2

            - separator between dates is : ,

         - **domain, domain_<digit>** : when a script can select a domain
           in the input grid, this is declared by adding this
           keyword in the calling sequence; CliMAF will replace it by the
           domain definition if needed, as 'latmin,latmax,lonmin,lonmax' ;
           'domain' stands for first input stream, 'domain_<digit>' for the
           next ones :

            - in the example above, we assume that external binary CDO is
              not tasked with selecting the domain, and that CliMAF must
              feed CDO with a datafile where it has already performed the
              selection

         - **out, out_<word>** : CliMAF provide file names for output
           files (if there is no such field, the script will have
           only 'side effects', e.g. launch a viewer). Main output
           file must be created by the script with the name provided
           at the location of argument ${out}. Using arguments like
           'out_<word>' tells CliMAF that the script provide some
           secondary output, which will be symbolically known in
           CliMAF syntax as an attribute of the main object; by
           default, the variable name of each output equals the name
           of the output (except for the main ouput, which variable
           name is supposed to be the same as for the first input);
           for other cases, see argument \*\*kwargs to provide a
           format string, used to derive the variable name from first
           input variable name as in e.g. :
           ``output2_var='std_dev(%s)'`` for the output labelled
           output2 (i.e. declared as '${out_output2}') or ``_var='std_dev(%s)'``
           for the default (main) output


           - in the example above, we just apply the convention used by CDO,
             which expects that you provide an output filename as last
             argument on the command line. See example mean_and_sdev in doc
             for advanced usage.

         - **crs** : will be replaced by the CliMAF Reference Syntax expression
           describing the first input stream; can be useful for plot title
           or legend

         - **alias** : used if the script can make an on the fly re-scaling
           and renaming of a variable. Will be replaced by a string which
           pattern is : 'new_varname,file_varname,scale,offset'. The script
           should then transform on reading as new_varname =
           file_varname * scale + offset

         - **units, units_<digit>** : means that the script can set the units
           on-the-fly while reading one of the input streams

         - **missing** : means that the script can make an on-the-fly
           transformation of a given constant to missing values

         - **labels** : for script accepting ensembles, CliMAF will
           replace this keyword by a string bearing the labels
           associated with the ensemble, with delimiter $ as e.g. in:
           "CNRM-CM5 is fine$IPSL-CM5-LR is not bad$CCSM-29 is ..."

        """
        # Check that script name do not clash with an existing symbol
        if name in sys.modules['__main__'].__dict__ and name not in scripts:
            clogger.error("trying to define %s as an operator, "
                          "while it exists as smthing else" % name)
            return None
        if name in scripts:
            clogger.warning("Redefining CliMAF script %s" % name)
        #
        # Check now that script is executable
        scriptcommand = command.split(' ')[0].replace("(", "")
        ex = subprocess.Popen(['which', scriptcommand], stdout=subprocess.PIPE)
        if ex.wait() != 0:
            Climaf_Operator_Error("defining %s : command %s is not "
                                  "executable" % (name, scriptcommand))
        executable = ex.stdout.read().replace('\n', '')
        #
        # Analyze inputs field keywords and populate dict
        # attribute 'inputs' with some properties
        self.inputs = dict()
        commuteWithEnsemble = True
        it = re.finditer(
            r"\${(?P<keyw>(?P<mult>mm)?in(?P<serie>s)?(_(?P<n>([\d]+)))?)}",
            command)
        for oc in it:
            if oc.group("n") is not None:
                rank = int(oc.group("n"))
            else:
                rank = 0
            if rank in self.inputs:
                Climaf_Operator_Error(
                    "When defining %s : duplicate declaration for input #%d" %
                    (name, rank))
            serie = (oc.group("serie") is not None)
            multiple = (oc.group("mult") is not None)
            if multiple:
                if rank != 0:
                    raise Climaf_Operator_Error(
                        "Only first operand may accept members")
                if serie:
                    raise Climaf_Operator_Error("Operand %s cannot both accept"
                                                "members and files set" %
                                                oc.group("keyw"))
                commuteWithEnsemble = False
            self.inputs[rank] = (oc.group("keyw"), multiple, serie)
        if len(self.inputs) == 0:
            Climaf_Operator_Error(
                "When defining %s : command %s must include at least one of "
                "${in} ${ins} ${mmin} or ${in_..} ... for specifying how CliMAF"
                " will provide the input filename(s)" % (name, command))
        # print self.inputs
        for i in range(len(self.inputs)):
            if i + 1 not in self.inputs and not (i == 0 and 0 in self.inputs):
                Climaf_Operator_Error(
                    "When defining %s : error in input sequence for rank %d" %
                    (name, i + 1))
        #
        # Check if command includes an argument allowing for
        # providing an output filename
        if command.find("${out") < 0:
            if format is not "txt":
                format = None
        #
        # Search in call arguments for keywords matching "<output_name>_var"
        # which may provide format string for 'computing' outputs variable
        # name from input variable name
        outvarnames = dict()
        pattern = r"^(.*)_var$"
        for p in kwargs:
            if re.match(pattern, p):
                outvarnames[re.findall(pattern, p)[0]] = kwargs[p]
        clogger.debug("outvarnames for script %s = %s" %
                      (name, repr(outvarnames)))
        #
        # Analyze outputs names , associated variable names
        # (or format strings), and store it in attribute dict 'outputs'
        self.outputs = dict()
        it = re.finditer(r"\${out(_(?P<outname>[\w-]*))?}", command)
        for occ in it:
            outname = occ.group("outname")
            if outname is not None:
                if outname in outvarnames:
                    self.outputs[outname] = outvarnames[outname]
                else:
                    self.outputs[outname] = "%s"  # outname
            else:
                self.outputs[None] = outvarnames.get('', "%s")
                self.outputs[''] = outvarnames.get('', "%s")
        # clogger.debug("outputs = "+`self.outputs`)
        #
        canSelectVar = canSelectVar or (command.find("${var}") > 0)
        canAggregateTime = (command.find("${ins}") > 0
                            or command.find("${ins_1}") > 0)
        canAlias = (command.find("${alias}") > 0)
        canMissing = (command.find("${missing}") > 0)
        canSelectTime = False
        if command.find("${period}") > 0 or command.find("${period_1}") > 0:
            canSelectTime = True
        if command.find("${period_iso}") > 0 or command.find(
                "${period_iso_1}") > 0:
            canSelectTime = True
        canSelectDomain = (command.find("${domain}") > 0
                           or command.find("${domain_1}") > 0)
        #
        self.name = name
        self.command = command
        self.fixedfields = None
        self.flags = scriptFlags(canOpendap, canSelectVar, canSelectTime,
                                 canSelectDomain, canAggregateTime, canAlias,
                                 canMissing, commuteWithEnsemble,
                                 commuteWithTimeConcatenation,
                                 commuteWithSpaceConcatenation)
        if format in known_formats or format in graphic_formats or format in none_formats:
            self.outputFormat = format
        else:
            raise Climaf_Operator_Error(
                "Allowed formats yet are : 'object', 'nc', 'txt', %s" %
                ', '.join([repr(x) for x in graphic_formats]))
        scripts[name] = self

        # Init doc string for the operator
        doc = "CliMAF wrapper for command : %s" % self.command
        # try to get a better doc string from colocated doc/directory
        docfilename = os.path.dirname(
            __file__) + "/../doc/scripts/" + name + ".rst"
        # print "docfilen= "+docfilename
        try:
            docfile = open(docfilename)
            doc = docfile.read()
            docfile.close()
        except:
            pass
        #
        # creates a function named as requested, which will invoke
        # capply with that name and same arguments
        defs = 'def %s(*args,**dic) :\n  """%s"""\n  return driver.capply("%s",*args,**dic)\n' \
               % (name, doc, name)
        exec defs in globals()  #
        exec "from climaf.operators import %s" % name in \
            sys.modules['__main__'].__dict__
        clogger.debug("CliMAF script %s has been declared" % name)
Пример #43
0
def macro(name, cobj, lobjects=[]):
    """

    Define a CliMAF macro from a CliMAF compound object.

    Transform a Climaf object in a macro, replacing all datasets,
    and the objects of lobjects, by a dummy argument.  Register it in
    dict cmacros, if name is not None

    Args:
     name (string) : the name you want to give to the macro; a Python
      function with the same name will be defined
     cobj (CliMAF object, or string) : any CliMAF object, usually
      the result of a series of operators, that you would like to
      repeat using other input datasets; alternatively, you can provide
      the macro formula as a string (when accustomed to the syntax)
     lobjects (list, optional):  for expert use- a list of objects,
      which are sub-objects of cobject, and which should become arguments
      of the macro

    Returns:
      a macro; the returned value is usualy not used 'as is' : a
      python function is also defined in module cmacros and in main
      namespace, and you may use it in the same way as a CliMAF
      operator. All the datasets involved in ``cobj`` become arguments
      of the macro, which allows you to re-do the same computations
      and easily define objects similar to ``cobjs``

    Example::

     >>> # First use and combine CliMAF operators to get some interesting result using some dataset(s)
     >>> january_ta=ds(project='example',simulation='AMIPV6ALB2G',variable='ta',frequency='monthly',period='198001')
     >>> ta_europe=llbox(january_ta,latmin=40,latmax=60,lonmin=-15,lonmax=25)
     >>> ta_ezm=ccdo(ta_europe,operator='zonmean')
     >>> fig_ezm=plot(ta_ezm)
     >>> #
     >>> # Using this result as an example, define a macro named 'eu_cross_section',
     >>> # which arguments will be the datasets involved in this result
     >>> cmacro('eu_cross_section',fig_ezm)
     >>> #
     >>> # You can of course apply a macro to another dataset(s) (even here to a 2D variable)
     >>> pr=ds(project='example',simulation='AMIPV6ALB2G', variable='pr', frequency='monthly', period='198001')
     >>> pr_ezm=eu_cross_section(pr)
     >>> #
     >>> # All macros are registered in dictionary climaf.cmacro.cmacros,
     >>> # which is imported by climaf.api; you can list it by :
     >>> cmacros

    Note : macros are automatically saved in file ~/.climaf.macros, and can be edited

    See also much more explanations in the example at :download:`macro.py <../examples/macro.py>`
    """
    if isinstance(cobj, str):
        s = cobj
        # Next line used for interpreting macros's CRS
        exec("from climaf.cmacro import cdummy; ARG=cdummy()",
             sys.modules['__main__'].__dict__)
        try:
            cobj = eval(cobj, sys.modules['__main__'].__dict__)
        except:
            # usually case of a CRS which project is not currently defined
            clogger.error(
                "Cannot interpret %s with the projects currently define" % s)
            return None
        #print "string %s was interpreted as %s"%(s,cobj)
    domatch = False
    for o in lobjects:
        domatch = domatch or cobj==o or \
                  ( isinstance(cobj,cobject) and cobj.buildcrs() == o.buildcrs())
    if isinstance(cobj, cdataset) or isinstance(cobj, cdummy) or domatch:
        return cdummy()
    elif isinstance(cobj, ctree):
        rep = ctree(cobj.operator, cobj.script, *cobj.operands,
                    **cobj.parameters)
        rep.operands = map(macro, [None for o in rep.operands], rep.operands)
    elif isinstance(cobj, scriptChild):
        rep = scriptChild(macro(None, cobj.father), cobj.varname)
    elif isinstance(cobj, cpage):
        rep = cpage([
            map(macro, [None for fig in line], line) for line in cobj.fig_lines
        ], cobj.widths, cobj.heights)
    elif isinstance(cobj, cens):
        d = dict()
        for k, v in zip(
                cobj.keys(),
                map(macro, [None for o in cobj.values()], cobj.values())):
            d[k] = v
        rep = cens(d)
    elif cobj is None:
        return None
    else:
        clogger.error("Cannot yet handle object :%s", ` cobj `)
        rep = None
    if name and rep:
        cmacros[name] = rep
        doc = "A CliMAF macro, which text is " + ` rep `
        defs='def %s(*args) :\n  """%s"""\n  return instantiate(cmacros["%s"],[ x for x in args])\n'\
              % (name,doc,name)
        exec defs in globals()
        exec "from climaf.cmacro import %s" % name in sys.modules[
            '__main__'].__dict__
        clogger.debug("Macro %s has been declared" % name)

    return rep
Пример #44
0
def lonlatvert_interpolation(dat1, dat2=None, vertical_levels=None, cdo_horizontal_grid='r1x90',
                             horizontal_regridding=True):
    """
    Interpolates a lon/lat/pres field dat1 via two possible ways:
    - either by providing a target lon/lat/pres field dat2 => dat1 is regridded both horizontally and vertically on dat2
    - or by providing a list of vertical levels => dat1 is regridded horizontally on the cdo_horizontal_grid
    (default='r1x90'), and vertically on the list of vertical levels
    The user can provide the vertical levels (in Pa) like this:
    vertical_levels=[100000,85000,50000,20000,...] # or
    vertical_levels='100000,85000,50000,20000'
    Before the computations, the function checks the unit of the vertical axis;
    it is converted to Pa if necessary directly in the netcdf file(s) corresponding to dat1(2).

       >>> dat = ds(project='CMIP5',model='IPSL-CM5A-LR',variable='ua',period='1980-1985',
                    experiment='historical',table='Amon')
       >>> ref = ds(project='ref_pcmdi',variable='ua',product='ERAINT')

       >>> zonmean_dat = zonmean(time_average(dat))
       >>> zonmean_ref = zonmean(time_average(ref))

       >>> dat_interpolated_on_ref = lonlatvert_interpolation(zonmean_dat,zonmean_ref)
       >>> dat_interpolated_on_list_of_levels = lonlatvert_interpolation(zonmean_dat,vertical_levels='100000,85000,50000,20000,10000,5000,2000,1000')

    """

    from climaf.anynetcdf import ncf
    from climaf import cachedir

    file1 = cfile(dat1)
    clogger.debug('file1 = %s' % file1)
    ncfile1 = ncf(file1)

    # -- First, we check the unit of the vertical dimension of file1
    levname1 = None
    for varname in ncfile1.variables:
        if varname.lower() in ['level', 'levels', 'lev', 'levs', 'depth', 'deptht',
                               'olevel'] or 'plev' in varname.lower():
            levname1 = varname
    if not levname1:
        clogger.debug('Name of the vertical axis not found for dat1')
    levunits1 = ncfile1.variables[levname1].units
    if levunits1.lower() in ['hpa', 'millibar', 'mbar', 'hectopascal']:
        # -- Multiplier par 100
        cscript('convert_plev_hPa_to_Pa',
                'ncap2 -As "' + levname1 + '=' + levname1 + '*100" ${in} ' + cachedir +
                '/convert_to_Pa_tmp.nc ; ncatted -O -a units,' + levname1 + ',o,c,Pa ' + cachedir +
                '/convert_to_Pa_tmp.nc ; mv ' + cachedir + '/convert_to_Pa_tmp.nc ${out}')
        dat1 = climaf.operators.convert_plev_hPa_to_Pa(dat1)
    # -> The vertical axis of file1 is now set to Pa
    #
    # -- Second, we check the unit of the vertical dimension of file2
    if dat2:
        file2 = cfile(dat2)
        clogger.debug('file2 = %s' % file2)
        ncfile2 = ncf(file2)

        levname2 = None
        for varname in ncfile2.variables:
            if varname.lower() in ['level', 'levels', 'lev', 'levs', 'depth', 'deptht',
                                   'olevel'] or 'plev' in varname.lower():
                levname2 = varname
        clogger.debug('levname2 = %s' % levname2)
        if not levname2:
            clogger.debug('Name of the vertical axis not found for dat2')
        levunits2 = ncfile2.variables[levname2].units
        clogger.debug('ncfile2 = %s' % ncfile2)
        try:
            levValues2 = ncfile2.variables[levname2].getValue()
        except:
            try:
                levValues2 = ncfile2.variables[levname2].data
            except:
                levValues2 = ncfile2[levname2][0:len(ncfile2[levname2])]
        if levunits2.lower() in ['hpa', 'millibar', 'mbar', 'hectopascal']:
            # -- Multiplier par 100
            cscript('convert_plev_hPa_to_Pa',
                    'ncap2 -As "' + levname2 + '=' + levname2 + '*100" ${in} ' + cachedir +
                    '/convert_to_Pa_tmp.nc ; ncatted -O -a units,' + levname2 + ',o,c,Pa ' + cachedir +
                    '/convert_to_Pa_tmp.nc ; mv ' + cachedir + '/convert_to_Pa_tmp.nc ${out}')
            dat2 = climaf.operators.convert_plev_hPa_to_Pa(dat2)

            # -> The vertical axis of file2 is now set to Pa in the netcdf file
            scale = 100.0
        else:
            scale = 1.0
        #
        # --> We get the values of the vertical levels of dat2 (from the original file, that's why we apply a scale)
        levels = ''
        for lev in levValues2:
            levels = levels + ',' + str(lev * scale)
        #
        # --> We can now interpolate dat1 on dat2 verticaly and horizontally
        if horizontal_regridding:
            regridded_dat1 = ccdo(regrid(dat1, dat2, option='remapdis'), operator='intlevel' + levels)
        else:
            regridded_dat1 = ccdo(dat1, operator='intlevel' + levels)
    else:
        if vertical_levels:
            if isinstance(vertical_levels, list):
                levels = ''
                for lev in vertical_levels:
                    levels = levels + ',' + str(lev)
            else:
                levels = ',' + vertical_levels
            if horizontal_regridding:
                regridded_dat1 = ccdo(regridn(dat1, cdogrid=cdo_horizontal_grid), operator='intlevel' + levels)
            else:
                regridded_dat1 = ccdo(dat1, operator='intlevel' + levels)
        else:
            clogger.error('--> Provide a list of vertical levels with vertical_levels')
    return regridded_dat1
Пример #45
0
def register(filename, crs, outfilename=None):
    """
    Adds in FILE a metadata named 'CRS_def' and with value CRS, and a
    metadata 'CLiMAF' with CliMAF version and ref URL

    Records this FILE in dict crs2filename

    If OUTFILENAME is not None, FILENAME is a temporary file and
    it's OUTFILENAME which is recorded in dict crs2filename

    Silently skip non-existing files
    """
    # First read index from file if it is yet empty - No : done at startup
    # if len(crs2filename.keys()) == 0 : cload()
    # It appears that we have to let some time to the file system  for updating its inode tables
    global dropped_crs
    if not stamping:
        clogger.debug('No stamping')
        crs2filename[crs] = filename
        return True
    waited = 0
    while waited < 50 and not os.path.exists(filename):
        time.sleep(0.1)
        waited += 1
    # time.sleep(0.5)
    if os.path.exists(filename):
        # while time.time() < os.path.getmtime(filename) + 0.2 : time.sleep(0.2)
        if re.findall(".nc$", filename):
            command = "ncatted -h -a CRS_def,global,o,c,\"%s\" -a CliMAF,global,o,c,\"CLImate Model Assessment " \
                      "Framework version %s (http://climaf.rtfd.org)\" %s" % (crs, version, filename)
        if re.findall(".png$", filename):
            crs2 = crs.replace("%", "\%")
            command = "convert -set \"CRS_def\" \"%s\" -set \"CliMAF\" \"CLImate Model Assessment Framework version " \
                      "%s (http://climaf.rtfd.org)\" %s %s.png && mv -f %s.png %s" % \
                      (crs2, version, filename, filename, filename, filename)
        if re.findall(".pdf$", filename):
            tmpfile = str(uuid.uuid4())
            command = "pdftk %s dump_data output %s && echo -e \"InfoBegin\nInfoKey: Keywords\nInfoValue: %s\" >> %s " \
                      "&& pdftk %s update_info %s output %s.pdf && mv -f %s.pdf %s && rm -f %s" % \
                      (filename, tmpfile, crs, tmpfile, filename, tmpfile, filename, filename, filename, tmpfile)
        if re.findall(".eps$", filename):
            command = 'exiv2 -M"add Xmp.dc.CliMAF CLImate Model Assessment Framework version %s ' \
                      '(http://climaf.rtfd.org)" -M"add Xmp.dc.CRS_def %s" %s' % \
                      (version, crs, filename)
        clogger.debug("trying stamping by %s" % command)
        if os.system(command) == 0:
            if outfilename:
                cmd = 'mv -f %s %s ' % (filename, outfilename)
                if os.system(cmd) == 0:
                    clogger.info("move %s as %s " % (filename, outfilename))
                    clogger.info("%s registered as %s" % (crs, outfilename))
                    crs2filename[crs] = outfilename
                    if crs in dropped_crs:
                        dropped_crs.remove(crs)
                    return True
                else:
                    clogger.critical("cannot move by" % cmd)
                    exit()
                    return None
            else:
                clogger.info("%s registered as %s" % (crs, filename))
                crs2filename[crs] = filename
                if crs in dropped_crs:
                    dropped_crs.remove(crs)
                return True
        else:
            clogger.critical("cannot stamp by %s" % command)
            exit()
            return None
    else:
        clogger.error("file %s does not exist (for crs %s)" % (filename, crs))
Пример #46
0
def selectGenericFiles(urls, **kwargs):
    """
    Allow to describe a ``generic`` file organization : the list of files returned 
    by this function is composed of files which :

    - match the patterns in ``url`` once these patterns are instantiated by 
      the values in kwargs, and 

     - contain the ``variable`` provided in kwargs

     - match the `period`` provided in kwargs
    
    In the pattern strings, no keyword is mandatory. However, for remote files,
    filename pattern must include ${varname}, which is instanciated by variable
    name or ``filenameVar`` (given via :py:func:`~climaf.classes.calias()`); this is  
    for the sake of efficiency (please complain if inadequate)
   
    Example :

    >>> selectGenericFiles(project='my_projet',model='my_model', simulation='lastexp', variable='tas', period='1980', urls=['~/DATA/${project}/${model}/*${variable}*YYYY*.nc)']
    /home/stephane/DATA/my_project/my_model/somefilewith_tas_Y1980.nc

    In the pattern strings, the keywords that can be used in addition to the argument
    names (e.g. ${model}) are:
    
    - ${variable} : use it if the files are split by variable and 
      filenames do include the variable name, as this speed up the search

    - YYYY, YYYYMM, YYYYMMDD : use it for indicating the start date of
      the period covered by each file, if this is applicable in the
      file naming; use a second time for end date, if applicable
      (otherwise the assumption is that the whole year -resp. month or
      day- is included in the file

    - wildcards '?' and '*' for matching respectively one and any number of characters


    """
    rep = []
    period = kwargs['period']
    if type(period) is str: period = init_period(period)
    variable = kwargs['variable']
    altvar = kwargs.get('filenameVar', variable)
    # a dict and an ordered list of date globbing patterns
    dt = dict(YYYY="????",
              YYYYMM="??????",
              YYYYMMDD="????????",
              YYYYMMDDHH="??????????")
    lkeys = dt.keys()
    lkeys.sort(reverse=True)
    # a dict and an ordered list for matching dates
    dr = dict(YYYY="([0-9]{4})",
              YYYYMM="([0-9]{6})",
              YYYYMMDD="([0-9]{8})",
              YYYYMMDDHH="([0-9]{10})")
    rkeys = dr.keys()
    rkeys.sort(reverse=True)
    #
    for l in urls:
        # Instantiate keywords in pattern with attributes values
        if re.findall(".*:.*", l):  # remote data
            remote_prefix = ':'.join(l.split(":")[0:-1]) + ':'
            template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
        else:  # local data
            remote_prefix = ""
            template = Template(l).safe_substitute(**kwargs)
        #print "template after attributes replace : "+template
        #
        # Construct a pattern for globbing dates
        temp2 = template
        for k in lkeys:
            temp2 = temp2.replace(k, dt[k])
        if remote_prefix:
            lfiles = sorted(glob_remote_data(remote_prefix, temp2))
            clogger.debug("Remote globbing %d files for varname on %s : " %
                          (len(lfiles), remote_prefix + temp2))
        else:  # local data
            lfiles = sorted(glob.glob(temp2))
            clogger.debug("Globbing %d files for varname on %s : " %
                          (len(lfiles), temp2))
        #
        # If unsuccessful using varname, try with filenameVar
        if len(lfiles
               ) == 0 and "filenameVar" in kwargs and kwargs['filenameVar']:
            # Change value of facet 'variable'
            kwargs['variable'] = kwargs['filenameVar']
            if remote_prefix:  # remote data
                template = Template(l.split(":")[-1]).safe_substitute(**kwargs)
            else:  # local data
                template = Template(l).safe_substitute(**kwargs)
            temp2 = template
            for k in lkeys:
                temp2 = temp2.replace(k, dt[k])
            #
            if remote_prefix:  #
                lfiles = sorted(glob_remote_data(remote_prefix, temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), remote_prefix + temp2))
            else:  # local data
                lfiles = sorted(glob.glob(temp2))
                clogger.debug("Globbing %d files for filenamevar on %s: " %
                              (len(lfiles), temp2))
        #
        # Construct regexp for extracting dates from filename
        regexp = None
        #print "template before searching dates : "+template
        for key in rkeys:
            #print "searchin "+key+" in "+=Template(l)
            start = template.find(key)
            if (start >= 0):
                #print "found "+key
                regexp = template.replace(key, dr[key], 1)
                hasEnd = False
                start = regexp.find(key)
                if (start >= 0):
                    hasEnd = True
                    regexp = regexp.replace(key, dr[key], 1)
                break
        #print "regexp before searching dates : "+regexp
        #
        for f in lfiles:
            #print "processing file "+f
            #
            # Analyze file time period
            fperiod = None
            if regexp:
                regexp0 = regexp.replace("*", ".*").replace("?", r".")
                #print "regexp for extracting dates : "+regexp
                start = re.sub(regexp0, r'\1', f)
                if start == f:
                    raise Climaf_Data_Error("Start period not found")  #?
                if hasEnd:
                    end = re.sub(regexp0, r'\2', f)
                    fperiod = init_period("%s-%s" % (start, end))
                else:
                    fperiod = init_period(start)
                #print "period for file %s is %s"%(f,fperiod)
                #
                # Filter file time period against required period
            else:
                if ( 'frequency' in kwargs and ((kwargs['frequency']=="fx") or \
                    kwargs['frequency']=="seasonnal" or kwargs['frequency']=="annual_cycle" )) :
                    # local data
                    if remote_prefix and \
                       ( (l.find("${variable}")>=0) or variable=='*' or \
                         fileHasVar(f,variable) or (variable != altvar and fileHasVar(f,altvar)) ) :
                        clogger.debug("adding fixed field :" + f)
                        rep.append(f)
                    # remote data
                    elif remote_prefix is not "":
                        if (l.split(":")[-1].find("${variable}")>=0) or variable=='*' or \
                           (variable != altvar and (f.find(altvar)>=0) ):
                            clogger.debug("adding fixed field :" +
                                          remote_prefix + f)
                            rep.append(remote_prefix + f)
                        else:
                            raise Climaf_Data_Error(
                                "For remote files, filename pattern (%s) should include ${varname} (which is instanciated by variable name or filenameVar)"
                                % f)
                else:
                    clogger.info(
                        "Cannot yet filter files re. time using only file content."
                    )
                    rep.append(f)

            if (fperiod and period.intersects(fperiod)) or not regexp:
                clogger.debug(
                    'Period is OK - Considering variable filtering on %s and %s for %s'
                    % (variable, altvar, f))
                # Filter against variable
                if (l.find("${variable}") >= 0):
                    clogger.debug(
                        'appending %s based on variable in filename' % f)
                    rep.append(remote_prefix + f)
                    continue
                if (f not in rep):
                    # local data
                    if not remote_prefix and \
                        (variable=='*' or "," in variable or fileHasVar(f,variable) or \
                        (altvar != variable and fileHasVar(f,altvar))) :
                        # Should check time period in the file if not regexp
                        clogger.debug(
                            'appending %s based on multi-var or var exists in file '
                            % f)
                        rep.append(f)
                        continue
                    # remote data
                    elif remote_prefix:
                        if variable=='*' or "," in variable or \
                            (variable != altvar and (f.find(altvar)>=0) ):
                            # Should check time period in the file if not regexp
                            clogger.debug(
                                'appending %s based on multi-var or altvar ' %
                                (remote_prefix + f))
                            rep.append(remote_prefix + f)
                            continue
                        else:
                            mess = "For remote files, filename pattern (%s) should include" % (
                                remote_prefix + f)
                            mess += " ${varname} (which is instanciated by variable name or filenameVar)"
                            raise Climaf_Data_Error(mess)
            else:
                if not fperiod:
                    clogger.debug('not appending %s because period is None ' %
                                  f)
                else:
                    if not period.intersects(fperiod):
                        clogger.debug(
                            'not appending %s because period doesn t intersect %s'
                            % (f, period))

        # Break on first url with any matching data
        if len(rep) > 0:
            clogger.debug('url %s does match for ' % l + ` kwargs `)
            break
    return rep