Exemplo n.º 1
0
def combine_meta (invars, outvar):
# {{{
  from pygeode.tools import common_dict
  from pygeode import Axis
  # Intrinsic attributes
  for att in 'name', 'units':
    s = list(set([getattr(v,att) for v in invars]))
    if len(s) == 1: setattr(outvar,att,s[0])
  # Get common metadata from the input segments
  # *Set* these attributes, don't 'update' the dictionaries!
  # This method may be called from the __init__ of a Var subclass, before the
  # dictionaries are properly created - the existing 'atts' and 'plotatts' may
  # be a shared dictionary! Skip Axis objects for the plotatts dict (see issue 53)
  outvar.atts = common_dict([v.atts for v in invars])
  outvar.plotatts = common_dict([v.plotatts for v in invars if not isinstance(v, Axis)])
Exemplo n.º 2
0
def combine_meta(invars, outvar):
    # {{{
    from pygeode.tools import common_dict
    from pygeode import Axis
    # Intrinsic attributes
    for att in 'name', 'units':
        s = list(set([getattr(v, att) for v in invars]))
        if len(s) == 1: setattr(outvar, att, s[0])
    # Get common metadata from the input segments
    # *Set* these attributes, don't 'update' the dictionaries!
    # This method may be called from the __init__ of a Var subclass, before the
    # dictionaries are properly created - the existing 'atts' and 'plotatts' may
    # be a shared dictionary! Skip Axis objects for the plotatts dict (see issue 53)
    outvar.atts = common_dict([v.atts for v in invars])
    outvar.plotatts = common_dict(
        [v.plotatts for v in invars if not isinstance(v, Axis)])
Exemplo n.º 3
0
def _get_var_info(manifest,opener):
  from pygeode.tools import common_dict
  atts = dict()
  table = dict()
  for filename, entries in manifest.items():
    for _varname, _axes, _atts in entries:
      _attslist = atts.setdefault(_varname,[])
      if _atts not in _attslist: _attslist.append(_atts)
      table.setdefault(_varname,[]).append((filename, opener, _axes))
  atts = dict((_varname,common_dict(_attslist)) for (_varname,_attslist) in atts.items())
  return atts, table
Exemplo n.º 4
0
 def add (self, *vars):
   from pygeode.var import Var
   from pygeode.tools import common_dict
   # Collect global attributes (from any Datasets passsed to us)
   atts = [self.atts] + [d.atts for d in vars if isinstance(d,Dataset)]
   atts = common_dict(*atts)
   for v in vars:
     assert isinstance(v,(Var,Dataset)), "'%s' is not a Var"%repr(v)
   # Expand all Datasets to Vars
   vars = [v for v in vars if isinstance(v,Var)] + sum([
          d.vars for d in vars if isinstance(d,Dataset)],[])
   vars = list(self.vars) + list(vars)
   d = Dataset(vars, atts=self.atts)
   return d
Exemplo n.º 5
0
def ensemble(*varlists):
    """
  Creates an ensemble out of a set of similar variables.
  The corresponding variable must have the same axes and the same name.
  If a bunch of vars are passed as inputs, then a single ensemble var is returned.
  If a bunch of datasets are passed as inputs, then a single dataset is returned, consisting of an ensemble of the internal vars.  Each input dataset must have matching vars.
  """
    from pygeode.var import Var
    from pygeode.dataset import Dataset, asdataset
    from pygeode.tools import common_dict
    datasets = [asdataset(v) for v in varlists]

    varnames = [v.name for v in datasets[0].vars]

    # Make sure we have the same varnames in each dataset
    for dataset in datasets:
        assert set(dataset.vardict.keys()) == set(
            varnames), "inconsistent variable names between datasets"

    # Make sure the varlists are all in the same order
    for i, dataset in enumerate(datasets):
        varlist = [dataset[varname] for varname in varnames]
        datasets[i] = Dataset(varlist, atts=dataset.atts)

    for varname in varnames:
        var0 = datasets[0][varname]
        for dataset in datasets:
            var = dataset[varname]
            # Make sure the axes are the same between ensemble vars
            assert var.axes == var0.axes, "inconsistent axes for %s" % varname

    # Collect the ensembles together
    ensembles = []
    for varname in varnames:
        ensemble = EnsembleVar([dataset[varname] for dataset in datasets])
        ensembles.append(ensemble)

    # Global attributes
    atts = common_dict(dataset.atts for dataset in datasets)
    if isinstance(varlists[0], Dataset): return Dataset(ensembles, atts=atts)
    if isinstance(varlists[0], Var):
        assert len(ensembles) == 1
        return ensembles[0]

    return ensembles
Exemplo n.º 6
0
def ensemble (*varlists):
  """
  Creates an ensemble out of a set of similar variables.
  The corresponding variable must have the same axes and the same name.
  If a bunch of vars are passed as inputs, then a single ensemble var is returned.
  If a bunch of datasets are passed as inputs, then a single dataset is returned, consisting of an ensemble of the internal vars.  Each input dataset must have matching vars.
  """
  from pygeode.var import Var
  from pygeode.dataset import Dataset, asdataset
  from pygeode.tools import common_dict
  datasets = [asdataset(v) for v in varlists]

  varnames = [v.name for v in datasets[0].vars]

  # Make sure we have the same varnames in each dataset
  for dataset in datasets: assert set(dataset.vardict.keys()) == set(varnames), "inconsistent variable names between datasets"

  # Make sure the varlists are all in the same order
  for i, dataset in enumerate(datasets):
    varlist = [dataset[varname] for varname in varnames]
    datasets[i] = Dataset(varlist, atts=dataset.atts)

  for varname in varnames:
    var0 = datasets[0][varname]
    for dataset in datasets:
      var = dataset[varname]
      # Make sure the axes are the same between ensemble vars
      assert var.axes == var0.axes, "inconsistent axes for %s"%varname

  # Collect the ensembles together
  ensembles = []
  for varname in varnames:
    ensemble = EnsembleVar([dataset[varname] for dataset in datasets])
    ensembles.append(ensemble)

  # Global attributes
  atts = common_dict(dataset.atts for dataset in datasets)
  if isinstance(varlists[0], Dataset): return Dataset(ensembles, atts=atts)
  if isinstance(varlists[0], Var):
    assert len(ensembles) == 1
    return ensembles[0]

  return ensembles
Exemplo n.º 7
0
    def concat(cls, axes):
        # {{{
        from numpy import concatenate
        from pygeode.tools import common_dict
        # Must all be same type of axis
        for a in axes:
            assert isinstance(a, cls), 'axes must be the same type'

        values = concatenate([a.values for a in axes])

        # Get common attributes
        atts = common_dict([a.atts for a in axes])

        aux = {}

        # Check that all pieces have the same auxiliary attributes, and propogate them to the output.
        auxkeys = set(axes[0].auxatts.keys())
        for a in axes[1:]:
            auxkeys = auxkeys.intersection(list(a.auxatts.keys()))
        for k in auxkeys:
            vals = [a.auxatts[k] for a in axes]
            v1 = vals[0]
            #      assert all(v == v1 for v in vals), "inconsistent '%s' attribute"%k
            # Only use consistent aux atts
            if all(v == v1 for v in vals):
                aux[k] = axes[0].auxatts[k]

        # Find and concatenate auxilliary arrays common to all axes being concatenated
        auxkeys = set(axes[0].auxarrays.keys())
        for a in axes[
                1:]:  # set.intersection takes multiple arguments only in python 2.6 and later..
            auxkeys = auxkeys.intersection(list(a.auxarrays.keys()))

        for k in auxkeys:
            aux[k] = concatenate([a.auxarrays[k] for a in axes])

        name = axes[0].name  #TODO: check all names?

        return cls(values, name=name, atts=atts, **aux)
Exemplo n.º 8
0
  def concat (cls, axes):
  # {{{
    from numpy import concatenate
    from pygeode.tools import common_dict
    # Must all be same type of axis
    for a in axes: assert isinstance(a,cls), 'axes must be the same type'

    values = concatenate([a.values for a in axes])

    # Get common attributes
    atts = common_dict([a.atts for a in axes])

    aux = {}

    # Check that all pieces have the same auxiliary attributes, and propogate them to the output.
    auxkeys = set(axes[0].auxatts.keys())
    for a in axes[1:]:
      auxkeys = auxkeys.intersection(list(a.auxatts.keys()))
    for k in auxkeys:
      vals = [a.auxatts[k] for a in axes]
      v1 = vals[0]
#      assert all(v == v1 for v in vals), "inconsistent '%s' attribute"%k
      # Only use consistent aux atts
      if all(v == v1 for v in vals):
        aux[k] = axes[0].auxatts[k]


    # Find and concatenate auxilliary arrays common to all axes being concatenated
    auxkeys = set(axes[0].auxarrays.keys())
    for a in axes[1:]:      # set.intersection takes multiple arguments only in python 2.6 and later..
      auxkeys = auxkeys.intersection(list(a.auxarrays.keys()))

    for k in auxkeys:
      aux[k] = concatenate([a.auxarrays[k] for a in axes])

    name = axes[0].name  #TODO: check all names?

    return cls(values, name=name, atts=atts, **aux)
Exemplo n.º 9
0
def concat(*datasets):
    # {{{
    from pygeode.concat import concat
    from pygeode.tools import common_dict, islist

    # Did we get passed a list of datasets already?
    # (need to break out of the outer list)
    if len(datasets) == 1 and islist(datasets[0]): datasets = list(datasets[0])

    #  If we only have one dataset, then return it
    if len(datasets) == 1: return datasets[0]

    # Collect a list of variable names (in the order they're found in the datasets)
    # and, a corresponding dictionary mapping the names to vars

    varnames = []
    vardict = {}
    for dataset in datasets:
        for v in dataset.vars:
            if v.name not in vardict:
                vardict[v.name] = []
                varnames.append(v.name)
            vardict[v.name].append(v)

    # Merge the var segments together
    # If only one segment, just use the variable itself
    vars = [vardict[n] for n in varnames]
    vars = [concat(v) if len(v) > 1 else v[0] for v in vars]

    d = Dataset(vars)

    # Keep any common global attributes
    # Collect all attributes found in the datasets
    atts = common_dict(*[x.atts for x in datasets])
    if len(atts) > 0:
        d.atts = atts
    return d
Exemplo n.º 10
0
def concat(*datasets):
  from pygeode.concat import concat
  from pygeode.tools import common_dict, islist

  # Did we get passed a list of datasets already?
  # (need to break out of the outer list)
  if len(datasets) == 1 and islist(datasets[0]): datasets = list(datasets[0])

  #  If we only have one dataset, then return it
  if len(datasets) == 1: return datasets[0]

  # Collect a list of variable names (in the order they're found in the datasets)
  # and, a corresponding dictionary mapping the names to vars

  varnames = []
  vardict = {}
  for dataset in datasets:
    for v in dataset.vars:
      if v.name not in vardict:
        vardict[v.name] = []
        varnames.append(v.name)
      vardict[v.name].append(v)

  # Merge the var segments together
  # If only one segment, just use the variable itself
  vars = [vardict[n] for n in varnames]
  vars = [concat(v) if len(v)>1 else v[0] for v in vars]

  d = Dataset(vars)

  # Keep any common global attributes
  # Collect all attributes found in the datasets
  atts = common_dict(*[x.atts for x in datasets])
  if len(atts) > 0:
    d.atts = atts
  return d
Exemplo n.º 11
0
    def add(self, *vars):
        # {{{
        '''Adds variables to the dataset.

    Parameters
    ----------
    *vars : :class:`Var` objects
      The variables to add

    Returns
    -------
    A new :class:`Dataset` with the variables added. 

    Notes
    -----
    The same naming rules are applied in case of name collisions as in
    :meth:`Dataset.__init__`. The addition operator is also overloaded to do
    the same thing; in that case provide a list of the variables to add.

		See Also
		--------
		
    
    Examples
    --------
    >>> from pygeode.tutorial import t1, t2
    >>> print t2.add(t1.Temp.rename('Temp2'))
    <Dataset>:
    Vars:
      Temp  (time,pres,lat,lon)  (3650,20,31,60)
      U     (time,pres,lat,lon)  (3650,20,31,60)
      Temp2 (lat,lon)            (31,60)
    Axes:
      time <ModelTime365>:  Jan 1, 2011 00:00:00 to Dec 31, 2020 00:00:00 (3650 values)
      pres <Pres>    :  1000 hPa to 50 hPa (20 values)
      lat <Lat>      :  90 S to 90 N (31 values)
      lon <Lon>      :  0 E to 354 E (60 values)
    Global Attributes:
      {'history': 'Synthetic Temperature and Wind data generated by pygeode'}
    >>> print t2 + t1.Temp.rename('Temp2')
    <Dataset>:
    Vars:
      Temp  (time,pres,lat,lon)  (3650,20,31,60)
      U     (time,pres,lat,lon)  (3650,20,31,60)
      Temp2 (lat,lon)            (31,60)
    Axes:
      time <ModelTime365>:  Jan 1, 2011 00:00:00 to Dec 31, 2020 00:00:00 (3650 values)
      pres <Pres>    :  1000 hPa to 50 hPa (20 values)
      lat <Lat>      :  90 S to 90 N (31 values)
      lon <Lon>      :  0 E to 354 E (60 values)
    Global Attributes:
      {'history': 'Synthetic Temperature and Wind data generated by pygeode'}
    '''
        from pygeode.var import Var
        from pygeode.tools import common_dict
        # Collect global attributes (from any Datasets passsed to us)
        atts = [self.atts] + [d.atts for d in vars if isinstance(d, Dataset)]
        atts = common_dict(*atts)
        for v in vars:
            assert isinstance(v, (Var, Dataset)), "'%s' is not a Var" % repr(v)
        # Expand all Datasets to Vars
        vars = [v for v in vars if isinstance(v, Var)] + sum(
            [d.vars for d in vars if isinstance(d, Dataset)], [])
        vars = list(self.vars) + list(vars)
        d = Dataset(vars, atts=self.atts)
        return d
Exemplo n.º 12
0
def open_multi (files, format=None, opener=None, pattern=None, file2date=None, **kwargs):
# {{{
  ''' Returns a :class:`Dataset` containing variables merged across many files.

  Parameters
  ==========
  files : string, list, or tuple
    Either a single filename or a list of filenames. Wildcards are supported, :func:`glob.iglob` is
    used to expand these into an explicit list of files.

  format : string, optional
    String specifying format of file to open. If none is given the format will be automatically
    detected from the first filename (see :func:`autodetectformat`)

  opener : function, optional
    Function to open individual files. If none is provided, uses the
    format-specific version of :func:`open`. The datasets returned by this
    function are then concatenated and returned. See Notes.

  pattern : string, optional
    A regex pattern to extract date stamps from the filename; used by default file2date.
    Matching patterns must be named <year>, <month>, <day>, <hour> or <minute>.
    Abbreviations are available for the above; $Y matches a four digit year, $m, $d, $H,
    and $M match a two-digit month, day, hour and minute, respectively.

  file2date : function, optional
    Function which returns a date dictionary given a filename. By default this is produced
    by applying the regex pattern ``pattern`` to the filename.

  sorted : boolean, optional
    If True, the filenames are sorted (by alpha) prior to opening each file, and
    the axes on the returned dataset are sorted by calling :meth:`Dataset.sorted`.

  **kwargs : keyword arguments
    These are passed on to the function ``opener``;

  Returns
  =======
  dataset
    A dataset containing the variables concatenated across all specified files.
    The variable data itself is not loaded into memory. 

  Notes
  =====
  This is intended to provide access to large datasets whose files are
  separated by timestep.  To avoid opening every file individually, the time
  axis is constructed by opening the first and the last file in the list of
  files provided. This is done to provide a template of what variables and what
  times are stored in each file - it is assumed that the number of timesteps
  (and their offsets) is the same accross the whole dataset. The time axis is
  then constructed from the filenames themselves, using the function
  ``file2date`` to generate a date from each filename. As a result only two files
  need to be opened, which makes this a very efficient way to work with very large
  datasets.

  However, no explicit check is made of the integrity of the files - if there
  are corrupt or missing data within individual files, this will not become
  clear until that data is actually accessed. This can be done explicitly with
  :func:`check_dataset`, which explicitly attempts to access all the data and
  returns a list of any problems encountered; this can take a long time, but is
  a useful check (and is more likely to provide helpful error messages). 

  The function ``opener`` must take a single positional argument - the filename
  of the file to open - and keyword arguments that are passed through from this
  function. It must return a :class:`Dataset` object with the loaded variables.
  By default the standard :func:`open` is used, but providing a custom opener
  can be useful for any reshaping of the variables that must be done prior to
  concatenating the whole dataset. 

  See Also
  ========
  open
  openall
  '''

  from pygeode.timeaxis import Time, StandardTime
  from pygeode.timeutils import reltime, delta
  from pygeode.dataset import Dataset
  from pygeode.tools import common_dict
  from pygeode.formats import open, autodetectformat
  import numpy as np

  files = expand_file_list(files)
  nfiles = len(files)
  assert nfiles > 0

  if opener is None: 
    if format is None: format = autodetectformat(files[0])

    if not hasattr(format, 'open'): 
      try:
        format = __import__("pygeode.formats.%s" % format, fromlist=["pygeode.formats"])
      except ImportError:
        raise ValueError('Unrecognized format module %s.' % format)

    opener = format.open

  # Apply keyword arguments
  if len(kwargs) > 0:
    old_opener = opener
    opener = lambda f: old_opener (f, **kwargs)


  # Degenerate case: only one file was given
  if nfiles == 1: return opener(files[0])


  # We'll need a function to translate filenames to dates
  # (if we don't have one, use the supplied pattern to make one)
  if file2date is None:
    import re
    assert pattern is not None, "I don't know how to get the dates from the filenames"
    regex = pattern
    regex = regex.replace('$Y', '(?P<year>[0-9]{4})')
    regex = regex.replace('$m', '(?P<month>[0-9]{2})')
    regex = regex.replace('$d', '(?P<day>[0-9]{2})')
    regex = regex.replace('$H', '(?P<hour>[0-9]{2})')
    regex = regex.replace('$M', '(?P<minute>[0-9]{2})')
    regex = re.compile(regex)
    def file2date (f):
      d = regex.search(f)
      assert d is not None, "can't use the pattern on the filenames?"
      d = d.groupdict()
      d = dict([k,int(v)] for k,v in d.iteritems())
      # Apply default values (i.e. for minutes, seconds if they're not in the file format?)
      d = dict({'hour':0, 'minute':0,'second':0}, **d)
      return d


  # Get the starting date of each file
  dates = [file2date(f) for f in files]
  dates = dict((k,[d[k] for d in dates]) for k in dates[0].keys())

  # Open a file to get a time axis
  file = opener(files[0])
  T = None
  for v in file.vars:
    if v.hasaxis(Time):
      T = type(v.getaxis(Time))
      break
  if T is None: T = StandardTime
#  T = [v.getaxis(Time) for v in file.vars if v.hasaxis(Time)]
#  T = type(T[0]) if len(T) > 0 else StandardTime
  del file

  # Generate a lower-resolution time axis (the start of *each* file)
  faxis = T(units='days',**dates)

  # Re-sort the files, if they weren't in order
  S = faxis.argsort()
  faxis = faxis.slice[S]
  files = [files[s] for s in S]
  # Re-init the faxis to force the proper start date
  faxis = type(faxis)(units=faxis.units, **faxis.auxarrays)

  # Open the first and last file, so we know what the variables & timesteps are
  first = opener(files[0])
  last  = opener(files[-1])
  names = [v.name for v in first.vars]
  for n in names: assert n in last, "inconsistent vars"
  # Get global attributes
  global_atts = common_dict (first.atts, last.atts)

  #---
  timedict = {None:faxis}
  for v1 in first:
    if not v1.hasaxis(Time): continue
    t1 = v1.getaxis(Time)
    if t1.name in timedict: continue  # already handled this one
    t2 = last[v1.name].getaxis(Time)
    # Construct a full time axis from these pieces

    # One timestep per file? (check for an offset for the var time compared
    #  to the file time)
    if max(len(t1),len(t2)) == 1:
      offset = reltime(t1, startdate=faxis.startdate, units=faxis.units)[0]
      taxis = faxis.withnewvalues(faxis.values + offset)
    # At least one of first/last files has multiple timesteps?
    else:
      assert t1.units == t2.units
      dt = max(delta(t1),delta(t2))
      assert dt > 0
      val1 = t1.values[0]
      val2 = reltime(t2, startdate=t1.startdate)[-1]
      nt = (val2-val1)/dt + 1
      assert round(nt) == nt
      nt = int(round(nt))
      assert nt > 0
      taxis = t1.withnewvalues(np.arange(nt)*dt + val1)

    timedict[t1.name] = taxis

  #---

  # Create the multifile version of the vars
  vars = [Multifile_Var(v1, opener, files, faxis, timedict) for v1 in first]


  return Dataset(vars,atts=global_atts)
Exemplo n.º 13
0
def open_multi (files, format=None, opener=None, pattern=None, file2date=None, **kwargs):
# {{{
  ''' Returns a :class:`Dataset` containing variables merged across many files.

  Parameters
  ==========
  files : string, list, or tuple
    Either a single filename or a list of filenames. Wildcards are supported, :func:`glob.iglob` is
    used to expand these into an explicit list of files.

  format : string, optional
    String specifying format of file to open. If none is given the format will be automatically
    detected from the first filename (see :func:`autodetectformat`)

  opener : function, optional
    Function to open individual files. If none is provided, uses the
    format-specific version of :func:`open`. The datasets returned by this
    function are then concatenated and returned. See Notes.

  pattern : string, optional
    A regex pattern to extract date stamps from the filename; used by default file2date.
    Matching patterns must be named <year>, <month>, <day>, <hour> or <minute>.
    Abbreviations are available for the above; $Y matches a four digit year, $m, $d, $H,
    and $M match a two-digit month, day, hour and minute, respectively.

  file2date : function, optional
    Function which returns a date dictionary given a filename. By default this is produced
    by applying the regex pattern ``pattern`` to the filename.

  sorted : boolean, optional
    If True, the filenames are sorted (by alpha) prior to opening each file, and
    the axes on the returned dataset are sorted by calling :meth:`Dataset.sorted`.

  **kwargs : keyword arguments
    These are passed on to the function ``opener``;

  Returns
  =======
  dataset
    A dataset containing the variables concatenated across all specified files.
    The variable data itself is not loaded into memory. 

  Notes
  =====
  This is intended to provide access to large datasets whose files are
  separated by timestep.  To avoid opening every file individually, the time
  axis is constructed by opening the first and the last file in the list of
  files provided. This is done to provide a template of what variables and what
  times are stored in each file - it is assumed that the number of timesteps
  (and their offsets) is the same accross the whole dataset. The time axis is
  then constructed from the filenames themselves, using the function
  ``file2date`` to generate a date from each filename. As a result only two files
  need to be opened, which makes this a very efficient way to work with very large
  datasets.

  However, no explicit check is made of the integrity of the files - if there
  are corrupt or missing data within individual files, this will not become
  clear until that data is actually accessed. This can be done explicitly with
  :func:`check_multi`, which explicitly attempts to access all the data and
  returns a list of any problems encountered; this can take a long time, but is
  a useful check (and is more likely to provide helpful error messages). 

  The function ``opener`` must take a single positional argument - the filename
  of the file to open - and keyword arguments that are passed through from this
  function. It must return a :class:`Dataset` object with the loaded variables.
  By default the standard :func:`open` is used, but providing a custom opener
  can be useful for any reshaping of the variables that must be done prior to
  concatenating the whole dataset. 

  See Also
  ========
  open
  openall
  '''

  from pygeode.timeaxis import Time, StandardTime
  from pygeode.timeutils import reltime, delta
  from pygeode.dataset import Dataset
  from pygeode.tools import common_dict
  from pygeode.formats import open, autodetectformat
  import numpy as np

  files = expand_file_list(files)
  nfiles = len(files)
  assert nfiles > 0

  if opener is None: 
    if format is None: format = autodetectformat(files[0])

    if not hasattr(format, 'open'): 
      try:
        format = __import__("pygeode.formats.%s" % format, fromlist=["pygeode.formats"])
      except ImportError:
        raise ValueError('Unrecognized format module %s.' % format)

    opener = format.open

  # Apply keyword arguments
  if len(kwargs) > 0:
    old_opener = opener
    opener = lambda f: old_opener (f, **kwargs)


  # Degenerate case: only one file was given
  if nfiles == 1: return opener(files[0])


  # We'll need a function to translate filenames to dates
  # (if we don't have one, use the supplied pattern to make one)
  if file2date is None:
    import re
    assert pattern is not None, "I don't know how to get the dates from the filenames"
    regex = pattern
    regex = regex.replace('$Y', '(?P<year>[0-9]{4})')
    regex = regex.replace('$m', '(?P<month>[0-9]{2})')
    regex = regex.replace('$d', '(?P<day>[0-9]{2})')
    regex = regex.replace('$H', '(?P<hour>[0-9]{2})')
    regex = regex.replace('$M', '(?P<minute>[0-9]{2})')
    regex = re.compile(regex)
    def file2date (f):
      d = regex.search(f)
      assert d is not None, "can't use the pattern on the filenames?"
      d = d.groupdict()
      d = dict([k,int(v)] for k,v in d.items() if v is not None)
      # Apply default values (i.e. for minutes, seconds if they're not in the file format?)
      d = dict({'hour':0, 'minute':0,'second':0}, **d)
      return d


  # Get the starting date of each file
  dates = [file2date(f) for f in files]
  dates = dict((k,[d[k] for d in dates]) for k in list(dates[0].keys()))

  # Open a file to get a time axis
  file = opener(files[0])
  T = None
  for v in file.vars:
    if v.hasaxis(Time):
      T = type(v.getaxis(Time))
      break
  if T is None: T = StandardTime
#  T = [v.getaxis(Time) for v in file.vars if v.hasaxis(Time)]
#  T = type(T[0]) if len(T) > 0 else StandardTime
  del file

  # Generate a lower-resolution time axis (the start of *each* file)
  faxis = T(units='days',**dates)

  # Re-sort the files, if they weren't in order
  S = faxis.argsort()
  faxis = faxis.slice[S]
  files = [files[s] for s in S]
  # Re-init the faxis to force the proper start date
  faxis = type(faxis)(units=faxis.units, **faxis.auxarrays)

  # Open the first and last file, so we know what the variables & timesteps are
  first = opener(files[0])
  last  = opener(files[-1])
  names = [v.name for v in first.vars]
  for n in names: assert n in last, "inconsistent vars"
  # Get global attributes
  global_atts = common_dict (first.atts, last.atts)

  #---
  timedict = {None:faxis}
  for v1 in first:
    if not v1.hasaxis(Time): continue
    t1 = v1.getaxis(Time)
    if t1.name in timedict: continue  # already handled this one
    t2 = last[v1.name].getaxis(Time)
    # Construct a full time axis from these pieces

    # One timestep per file? (check for an offset for the var time compared
    #  to the file time)
    if max(len(t1),len(t2)) == 1:
      offset = reltime(t1, startdate=faxis.startdate, units=faxis.units)[0]
      taxis = faxis.withnewvalues(faxis.values + offset)
    # At least one of first/last files has multiple timesteps?
    else:
      assert t1.units == t2.units
      dt = max(delta(t1),delta(t2))
      assert dt > 0
      val1 = t1.values[0]
      val2 = reltime(t2, startdate=t1.startdate)[-1]
      nt = (val2-val1)/dt + 1
      assert round(nt) == nt
      nt = int(round(nt))
      assert nt > 0
      taxis = t1.withnewvalues(np.arange(nt)*dt + val1)

    timedict[t1.name] = taxis

  #---

  # Create the multifile version of the vars
  vars = [Multifile_Var(v1, opener, files, faxis, timedict) for v1 in first]


  return Dataset(vars,atts=global_atts)