Python Ensemble примеры использования

Язык программирования: Python

Пространство имен/Пакет: geodata.base

Класс/Тип: Ensemble

Примеров на hotexamples.com: 9

Python Ensemble - 9 примеров найдено. Это лучшие примеры Python кода для geodata.base.Ensemble, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Ensemble(5)

insertMember(2)

axisIndex(1)

hasAxis(1)

insertAxes(1)

mean(1)

resolution(1)

seasonalSample(1)

Пример #1

Показать файл

Файл: common.py Проект: EdwardBetts/GeoPy

 def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, 
              lensemble=None, ens_name=None, ens_title=None, **kwargs):
   ''' wrap original function: expand argument list, execute load_fct over argument list, 
       and return a list or Ensemble of datasets '''
   # decide, what to do
   if load_list is None and inner_list is None and outer_list is None:
     # normal operation: no expansion      
     datasets =  self.load_fct(**kwargs)
   else:
     # expansion required
     lensemble = ens_name is not None if lensemble is None else lensemble
     # figure out arguments
     kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, 
                                      inner_list=inner_list, outer_list=outer_list, **kwargs)
     # load datasets
     datasets = []
     for kwargs in kwargs_list:    
       # load dataset
       datasets.append(self.load_fct(**kwargs))    
     # construct ensemble
     if lensemble:
       datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset')
   # return list or ensemble of datasets
   return datasets

Пример #2

Показать файл

Файл: common.py Проект: aerler/GeoPy

def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError()
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      slcs = dict() if slcs is None else slcs.copy()
      slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError(varlist)
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError(ensemble)
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble

Пример #3

Показать файл

Файл: common.py Проект: EdwardBetts/GeoPy

def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError 
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      if slcs is None: slcs = obsslcs
      else: slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError, varlist
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError, ensemble
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble

Пример #4

Показать файл

Файл: common.py Проект: EdwardBetts/GeoPy

def selectElements(datasets, axis, testFct=None, master=None, linplace=False, lall=False):
  ''' Extract common points that meet a specific criterion from a list of datasets. 
      The test function has to accept the following input: index, dataset, axis'''
  if linplace: raise NotImplementedError, "Option 'linplace' does not work currently."
  # check input
  if not isinstance(datasets, (list,tuple,Ensemble)): raise TypeError
  if not all(isinstance(dataset,Dataset) for dataset in datasets): raise TypeError 
  if not isCallable(testFct) and testFct is not None: raise TypeError
  if isinstance(axis, Axis): axis = axis.name
  if not isinstance(axis, basestring): raise TypeError
  if lall and master is not None: raise ArgumentError, "The options 'lall' and 'imaster' are mutually exclusive!"
  # save some ensemble parameters for later  
  lnotest = testFct is None
  lens = isinstance(datasets,Ensemble)
  if lens:
    enskwargs = dict(basetype=datasets.basetype, idkey=datasets.idkey, 
                     name=datasets.name, title=datasets.title) 
  # use dataset with shortest axis as master sample (more efficient)
  axes = [dataset.getAxis(axis) for dataset in datasets]
  if master is None: imaster = np.argmin([len(ax) for ax in axes]) # find shortest axis
  elif isinstance(master,basestring): 
    # translate name of dataset into index
    imaster = None
    for i,dataset in enumerate(datasets): 
      if dataset.name == master: 
        imaster = i; break
    if imaster is None: raise ArgumentError, "Master '{:s}' not found in datasets".format(master)
  else: imaster = master
  if not imaster is None and not isinstance(imaster,(int,np.integer)): raise TypeError, imaster
  elif imaster >= len(datasets) or imaster < 0: raise ValueError 
  maxis = axes.pop(imaster) # extraxt shortest axis for loop
  if lall: 
    tmpds = tuple(datasets)
    if imaster != 0: tmpds = (tmpds[imaster],)+tmpds[:imaster]+tmpds[imaster+1:]
    test_fct = lambda i,ds: testFct(i, ds, axis) # prepare test function arguments
  else: 
    test_fct = lambda i: testFct(i, datasets[imaster], axis) 
  # loop over coordinate axis
  itpls = [] # list of valid index tuple
  for i,x in enumerate(maxis.coord):
    # check other axes
    if all([x in ax.coord for ax in axes]): # only the other axes
      # no condition
      if lnotest:
        # just find and add indices
        itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes))
      # check condition using shortest dataset
      elif lall: 
        # check test condition on all datasets (slower)
        tmpidx = (i,)+tuple(ax.coord.searchsorted(x) for ax in axes)
        if all(test_fct(ii,ds) for ii,ds in zip(tmpidx,tmpds)):
          # add corresponding indices in each dataset to list
          itpls.append(tmpidx)
      else:
        # check test condition on only one dataset (faster, default)
        if test_fct(i):
          # add corresponding indices in each dataset to list
          itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes))
          # N.B.: since we can expect exact matches, plain searchsorted is fastest (side='left') 
  # check if there is anything left...
  if len(itpls) == 0: raise DatasetError, "Aborting: no data points match all criteria!"
  # construct axis indices for each dataset (need to remember to move shortest axis back in line)
  idxs = [[] for ds in datasets] # create unique empty lists
  for itpl in itpls:
    for i,idx in enumerate(itpl): idxs[i].append(idx)
  idxs.insert(imaster,idxs.pop(0)) # move first element back in line (where shortest axis was)
  idxs = [np.asarray(idxlst, dtype='int') for idxlst in idxs]      
  # slice datasets using only positive results  
  datasets = [ds(lidx=True, linplace=linplace, **{axis:idx}) for ds,idx in zip(datasets,idxs)]
  if lens: datasets = Ensemble(*datasets, **enskwargs)
  # return datasets
  return datasets

Пример #5

Показать файл

Файл: plot.py Проект: aerler/WRF-Projects

def generateStatistics(varname, ens, fit, scl=None, reference=None, mode='Ratio', plot_labels=None, 
                       nsamples=None, bootstrap_axis='bootstrap', lflatten=False, sample_axis='time', 
                       lcrossval=True):
  ''' Perform K-S test and compute ratio of means; return results in formatted string. '''
  # some average diagnosics
  idkey = 'dataset_name' if ens.basetype is Dataset else 'name'  
  varlist = Ensemble(*[ds[varname] for ds in ens if ds is not None and varname in ds], idkey=idkey)
  if not all(varlist[0].ndim==ndim for ndim in varlist.ndim):
    new_axes = varlist[np.argmax(varlist.ndim)].axes
    varlist = varlist.insertAxes(new_axes=new_axes, lcheckAxis=False)    
  mvars = varlist.mean() # growth rate
  lratio = mode.lower() == 'ratio'
  lshift = mode.lower() == 'shift'
  if plot_labels is None: plot_labels = dict()
  # figure out fillValue
  if np.issubdtype(varlist[0].dtype, np.floating): fillValue = np.NaN
  elif np.issubdtype(varlist[0].dtype, np.integer): fillValue = 0
  else: raise TypeError(varlist[0].dtype)
  # define reference
  if isinstance(reference,(list,tuple)): 
    reflist0 = list(reference); reference = reference[0]
  else: reflist0 = [] # dummy list
  if reference is None: iref0 = 0
  elif isinstance(reference,(int,np.integer)): iref0 = reference 
  elif isinstance(reference,str): iref0 = varlist.idkeys.index(reference)
  else: raise ArgumentError  
  # goodness of fit, reported on plot panels
  if fit:
    fitlist = Ensemble(*[ds[varname] for ds in fit if ds is not None and varname in ds], idkey=idkey)
    if any(fitlist.hasAxis(bootstrap_axis)): fitlist = fitlist(**{bootstrap_axis:0, 'lcheckAxis':False})
    if not all(fitlist[0].ndim==ndim for ndim in fitlist.ndim):
      new_axes = fitlist[np.argmax(fitlist.ndim)].axes
      fitlist = fitlist.insertAxes(new_axes=new_axes, lcheckAxis=False) 
#       for var in fitlist: 
#         print [ax.name for ax in var.axes], var.shape
#       assert  np.all(fitlist[0][1,:] == fitlist[0][2,:])
    assert not isinstance(reference,str) or iref0 == fitlist.idkeys.index(reference), reference
    if any([isinstance(dist,VarRV) for dist in fitlist]) or not scl:
      names = [plot_labels.get(getattr(dist,idkey),getattr(dist,idkey)) for dist in fitlist]  
      lnames = max([len(name) for name in names]) # allocate line space
      headline = 'Sample'; lhead = len(headline) # sample/exp header
      headline += ' '*max(lnames-lhead,0) # 'Exp.'+' '*max(lnames-4,0) if lnames < 8 else 'Experiment'
      string = '{:s}  Fit  {:s}\n'.format(headline,mode.title())
      namestr = '{{:>{:d}s}}  {{:s}}  '.format(max(lhead,lnames))
      iref = iref0; reflist = reflist0[:] # copy list
      for i,dist,var,name,mvar in zip(range(len(fitlist)),fitlist,varlist,names,mvars):
        if isinstance(dist,VarRV) or not scl:
          if isinstance(dist,VarRV):
            pval = dist.fittest(var, nsamples=nsamples, asVar=False, lcrossval=lcrossval) #lflatten=lflatten, axis_idx=var.axisIndex(sample_axis, lcheck=False))
#             print var.name, pval, pval.mean().__class__.__name__, '{:s}'.format(pval.mean())
#             pval = '{:3.2f}'.format(float(pval.mean())) # mean is only necessary to convert to scalar
            pval = '{:3.2f}'.format(float(np.median(pval))) # mean is only necessary to convert to scalar
            # for some reason masked array scalars appear string-type, rather than numbers... 
          else: pval = '  - '
          if len(reflist) > 0 and name == reflist[0]: # assign new reference 
            iref = i; del reflist[0] # pop element 
          if isinstance(mvar,np.ma.core.MaskedConstant) or isinstance(mvars[iref],np.ma.core.MaskedConstant): 
            string += namestr.format(name,' N/A\n')
          elif lratio: string += (namestr+'{:3.2f}\n').format(name,pval,(mvar/mvars[iref]).mean())
          elif lshift: string += (namestr+'{:+2.1f}\n').format(name,pval,(mvar-mvars[iref]).mean())
    else: string = ''
  else: raise NotImplementedError
  if scl:
    scllist = Ensemble(*[ds[varname] for ds in scl if ds is not None and varname in ds], idkey=idkey)
    bs_axes = scllist.axisIndex(bootstrap_axis, lcheck=False) # return None, if not present
    if bs_axes is None: bs_axes = [None]*len(scllist)
    scllist = scllist(**{bootstrap_axis:0, 'lcheckAxis':False})
    if not all(scllist[0].ndim==ndim for ndim in scllist.ndim):
      new_axes = scllist[np.argmax(scllist.ndim)].axes
      scllist = scllist.insertAxes(new_axes=new_axes, lcheckAxis=False) 
    assert not isinstance(reference,str) or iref0 == scllist.idkeys.index(reference), reference
    if len(scllist) != len(varlist): raise AxisError(scllist)
    # compute means
    mvars = []
    for svr,var in zip(scllist,varlist):
      if isinstance(svr,VarRV): mvar = svr.stats(moments='mv', asVar=False)[...,0] # only first moment
      else: mvar = var.mean()*svr.atts.get('loc_factor',1.)
      mvars.append(mvar)        
    # figure out label width and prepare header
    if len(varlist) > 1: # otherwise no comparison...
      names = [plot_labels.get(getattr(dist,idkey),getattr(dist,idkey)) for dist in scllist]  
      lnames = max([len(name) for name in names]) # allocate line space
      namestr = '{{:>{:d}s}}  {{:s}}  '.format(max(lhead,lnames))
      tmphead = 'Fit to {:s}:' if scl == fit else 'Rescaled to {:s}:' # new heading
      tmphead += ' '*(max(lnames-len(names[iref0]),0)+5)+'\n'
      string += tmphead.format(names[iref0])
      # prepare first reference sample for K-S test
      scale,shape = scllist[iref0].atts.get('scale_factor', 1),scllist[iref0].atts.get('shape_factor', 1)
      if not (scale is None or scale == 1) and not (shape is None or shape == 1): 
        raise NotImplementedError("Cannot rescale scale/variance and shape parameters of reference sample!")
      refsmpl = varlist[iref0].getArray(unmask=True, fillValue=fillValue) # only once
      loc0 = scllist[iref0].atts.get('loc_factor', 1)     
      refsmpl = _rescaleSample(refsmpl, loc0, bs_axis=bs_axes[iref0]) # apply rescaling (varies, dependign on loc-type)
  #     print varlist[iref0].dataset_name, [ax.name for ax in varlist[iref0].axes], refsmpl.shape, 
      # start loop
      iref = iref0; reflist = reflist0[:] # copy list
      for i,dist,varsmpl,mvar,bs_axis in zip(range(len(varlist)),scllist,varlist,mvars,bs_axes):
        name = getattr(dist,idkey)
        if len(reflist) > 0 and name == reflist[0]: # assign new reference 
          iref = i; del reflist[0] # pop element       
          # prepare subsequent reference sample for K-S test
          scale,shape = dist.atts.get('scale_factor', 1),dist.atts.get('shape_factor', 1)
          if not (scale is None or scale == 1) and not (shape is None or shape == 1): 
            raise NotImplementedError("Cannot rescale scale/variance and shape parameters of reference sample!")
          refsmpl = varsmpl.getArray(unmask=True, fillValue=fillValue) # only once
          if not varsmpl.atts.get('rescaled',False):
            refsmpl = _rescaleSample(refsmpl, dist.atts.get('loc_factor', 1), bs_axis=bs_axis) # apply rescaling (varies, dependign on loc-type)
        elif i != iref:
          scale,shape = dist.atts.get('scale_factor', 1),dist.atts.get('shape_factor', 1) 
          # perform K-S test
          if (scale is None or scale == 1) and (shape is None or shape == 1):
            # K-S test between actual samples is more realistic, and rescaling of mean is simple
            smpl = varsmpl.getArray(unmask=True, fillValue=fillValue) # only once
            if not varsmpl.atts.get('rescaled',False):
              smpl = _rescaleSample(smpl, dist.atts.get('loc_factor', 1), bs_axis=bs_axis) # apply rescaling (varies, dependign on loc-type)
  #           print varsmpl.dataset_name, [ax.name for ax in varsmpl.axes], smpl.shape
  #           print smpl.shape, np.nanmean(smpl), refsmpl.shape, np.nanmean(refsmpl)
  #           print lflatten, sample_axis
            pval = ks_2samp(refsmpl, smpl, asVar=False, lflatten=lflatten, 
                            axis_idx=varsmpl.axisIndex(sample_axis, lcheck=False))
  #           print dist.name, pval
  #           pval = '{:3.2f}'.format(float(pval.mean()))
            pval = '{:3.2f}'.format(float(np.median(pval)))
          else:
            # no straight-forward way to rescale samples, so have to compare distribution with 
            # reference sample, which means more noise (since the distribution will be randomly sampled)
            if isinstance(dist,VarRV): pval = '{:3.2f}'.format(float(dist.kstest(refsmpl).mean()))
            else: pval = '  - '
          # add column with ratio/difference of means after rescaling
          if name in plot_labels: name = plot_labels[name]  
          if isinstance(mvar,np.ma.core.MaskedConstant) or isinstance(mvars[iref],np.ma.core.MaskedConstant):
            string += namestr.format(name,' N/A\n') 
          elif lratio: string += (namestr+'{:3.2f}\n').format(name,pval,(mvar/mvars[iref]).mean())
          elif lshift: string += (namestr+'{:+2.1f}\n').format(name,pval,(mvar-mvars[iref]).mean())
  # return formatted table in string
  return string

Пример #6

Показать файл

Файл: load.py Проект: xiefengy/WRF-Projects

def loadShapeObservations(obs=None,
                          seasons=None,
                          basins=None,
                          provs=None,
                          shapes=None,
                          varlist=None,
                          slices=None,
                          aggregation='mean',
                          shapetype=None,
                          period=None,
                          variable_list=None,
                          **kwargs):
    ''' convenience function to load shape observations; the main function is to select sensible defaults 
      based on 'varlist', if no 'obs' are specified '''
    # prepare arguments
    if shapetype is None: shapetype = 'shpavg'  # really only one in use
    # resolve variable list (no need to maintain order)
    if isinstance(varlist, basestring): varlist = [varlist]
    variables = set(shp_params)
    for name in varlist:
        if name in variable_list: variables.update(variable_list[name].vars)
        else: variables.add(name)
    variables = list(variables)
    # figure out default datasets
    if obs is None: obs = 'Observations'
    lUnity = lCRU = lWSC = False
    if obs[:3].lower() in ('obs', 'wsc'):
        if any(var in CRU_vars for var in variables):
            if aggregation == 'mean' and seasons is None:
                lUnity = True
                obs = []
        if basins and any([var in WSC_vars for var in variables]):
            if aggregation.lower() in ('mean', 'std', 'sem', 'min',
                                       'max') and seasons is None:
                lWSC = True
                obs = []
    if not isinstance(obs, (list, tuple)): obs = (obs, )
    # configure slicing (extract basin/province/shape and period)
    slices = _configSlices(slices=slices,
                           basins=basins,
                           provs=provs,
                           shapes=shapes,
                           period=period)
    if slices is not None:
        noyears = slices.copy()
        noyears.pop('years', None)  # slices for climatologies
    # prepare and load ensemble of observations
    obsens = Ensemble(name='obs', title='Observations', basetype=Dataset)
    if len(obs) > 0:  # regular operations with user-defined dataset
        try:
            ensemble = loadEnsembleTS(names=obs,
                                      season=seasons,
                                      aggregation=aggregation,
                                      slices=slices,
                                      varlist=variables,
                                      shape=shapetype,
                                      ldataset=False,
                                      **kwargs)
            for ens in ensemble:
                obsens += ens
        except EmptyDatasetError:
            pass
    if lUnity:  # load Unity data instead of averaging CRU data
        if period is None: period = (1979, 1994)
        dataset = loadDataset(name='Unity',
                              varlist=variables,
                              mode='climatology',
                              period=period,
                              shape=shapetype)
        if slices is not None:
            dataset = dataset(**noyears)  # slice immediately
        obsens += dataset.load()
    if lCRU:  # this is basically regular operations with CRU as default
        obsens += loadEnsembleTS(names='CRU',
                                 season=seasons,
                                 aggregation=aggregation,
                                 slices=slices,
                                 varlist=variables,
                                 shape=shapetype,
                                 ldataset=True,
                                 **kwargs)
    if lWSC:  # another special case: river hydrographs
        #     from datasets.WSC import loadGageStation, GageStationError
        try:
            dataset = loadGageStation(basin=basins,
                                      varlist=['runoff'],
                                      aggregation=aggregation,
                                      mode='climatology',
                                      filetype='monthly')
            if slices is not None:
                dataset = dataset(**noyears)  # slice immediately
            obsens += dataset.load()
        except GageStationError:
            pass  # just ignore, if gage station data is missing
    # return ensembles (will be wrapped in a list, if BatchLoad is used)
    return obsens

Пример #7

Показать файл

def rescaleDistributions(datasets,
                         reference=None,
                         target=None,
                         lscale=False,
                         suffixes=None,
                         lglobal=False):
    ''' Rescale datasets, so that the mean of each variable matches the corresponding variable in the
      reference dataset; if a target is specified, the target scale factors are applied to all
      datasets, if target is None, each dataset is rescaled individually. '''
    if not isinstance(datasets, (list, tuple, Ensemble)): raise TypeError
    if isinstance(datasets, Ensemble) and isinstance(reference, basestring):
        reference = datasets[reference]
    elif not isinstance(reference, Dataset):
        raise TypeError
    if target is None or target == 'auto':
        pass  # every dataset is scaled individually or based on suffixes
    elif isinstance(datasets, Ensemble) and isinstance(target, basestring):
        target = datasets[target]
    elif not isinstance(target, Dataset):
        raise TypeError, target
    if suffixes is None:
        suffixes = ('-2050', '2100')  # suffixes for scaling heuristic

    # determine scale factor
    def scaleFactor(reference, target, lscale=False, lglobal=False):
        ''' internal function to compute rescaling factors for common variables '''
        scalefactors = dict(
        )  # return dict with scalefactors for all applicable variables
        for varname, refvar in reference.variables.iteritems():
            if varname in target and isinstance(
                    refvar, VarRV):  # only varaibles that appear in both sets
                tgtvar = target.variables[varname]
                iloc = 1 if refvar.shape[-1] == 3 else 0
                # insert dummy ensemble axis, if necessary
                refvar = refvar.insertAxes(new_axes=tgtvar.axes,
                                           lcopy=True,
                                           asVar=True,
                                           linplace=False)
                if refvar.axes[-1].name.startswith('params'):
                    refdata = refvar.data_array.take(iloc, axis=-1)
                else:
                    raise AxisError, refvar.axes[-1]
                if refvar.ndim < tgtvar.ndim:
                    # N.B.: this is necessary, because WRF (target) can have an extra ensemble dimension that obs
                    #       typically don't have; then we just replicate the obs for each ensemble element
                    from warnings import warn
                    if lglobal:
                        warn(
                            "Scalefactors are being averaged over extra target dimensions (e.g. 'ensemble' axis)"
                        )
                    dimdiff = tgtvar.ndim - refvar.ndim
                    if refvar.shape != tgtvar.shape[dimdiff:]:
                        raise AxisError, "{:s} != {:s}".format(tgtvar, refvar)
                    refdata = refdata.reshape((1, ) * dimdiff +
                                              refvar.shape[:-1])
                elif refvar.shape != tgtvar.shape:
                    raise AxisError, "{:s} != {:s}".format(tgtvar, refvar)
                tgtdata = tgtvar.data_array.take(iloc, axis=-1)
                if lglobal: loc = np.mean(refdata) / np.mean(tgtdata)
                else: loc = refdata / tgtdata
                if lscale:
                    iscale = 2 if refvar.shape[-1] == 3 else 1
                    if lglobal:
                        scale = np.mean(refvar.data_array.take(
                            iscale, axis=-1)) / np.mean(
                                tgtvar.data_array.take(iscale, axis=-1))
                    else:
                        scale = refvar.data_array.take(
                            iscale, axis=-1) / tgtvar.data_array.take(iscale,
                                                                      axis=-1)
                    scalefactors[varname] = loc, (scale / loc)
                else:
                    scalefactors[varname] = loc
        return scalefactors  # return dict with scale factors for variables

    # compute general scalefactors
    if target == 'auto':
        scalefactor_collection = dict()
    elif target is not None:
        scalefactors = scaleFactor(reference,
                                   target,
                                   lscale=lscale,
                                   lglobal=lglobal)
    # loop over datasets
    rescaled_datasets = []
    for dataset in datasets:
        if dataset == reference:
            # determine variables that can be scaled (VarRV's)
            varlist = [
                varname for varname, var in dataset.variables.iteritems()
                if isinstance(var, VarRV)
            ]
            rescaled_dataset = dataset.copy(varlist=varlist)
            # add mock scale factors for consistency
            for var in rescaled_dataset.variables.itervalues():
                var.atts['loc_factor'] = 1
                var.atts['scale_factor'] = 1
                var.atts['shape_factor'] = 1
        else:
            # generate new dataset (without variables, and in-memory)
            if isinstance(dataset, DatasetNetCDF):
                rescaled_dataset = dataset.copy(varlist=[], asNC=False)
            else:
                rescaled_dataset = dataset.copy(varlist=[])
            # individual scaling
            if target is None or target == 'auto':
                parent = None
                if target == 'auto' and dataset.name.endswith(suffixes):
                    for suffix in suffixes:
                        if dataset.name.endswith(
                                suffix):  # check, which suffix, and remove it
                            parent = dataset.name[:-(len(suffix) + 1)]
                            break
                    if parent and '-' not in parent:
                        parent += '-1'  # convention for WRF names
                if parent and parent in scalefactor_collection:
                    scalefactors = scalefactor_collection[
                        parent]  # use scale factors from parent
                else:  # scale individually
                    scalefactors = scaleFactor(reference,
                                               dataset,
                                               lscale=lscale,
                                               lglobal=lglobal)
                    if target == 'auto':
                        scalefactor_collection[
                            dataset.name] = scalefactors  # for later use
            # loop over variables
            for varname, scalefactor in scalefactors.iteritems():
                if varname in dataset:
                    # rescale and add variable to new dataset
                    var = dataset.variables[varname]
                    if lscale:
                        rsvar = var.rescale(loc=scalefactor[0],
                                            scale=scalefactor[1])
                    else:
                        rsvar = var.rescale(loc=scalefactor)
                    rescaled_dataset.addVariable(rsvar)
        # add dataset to list
        rescaled_datasets.append(rescaled_dataset)
    # put everythign into Ensemble, if input was Ensemble
    if isinstance(datasets, Ensemble):
        rescaled_datasets = Ensemble(*rescaled_datasets,
                                     name=datasets.ens_name,
                                     title=datasets.ens_title)
    # return datasets/ensemble
    return rescaled_datasets

Пример #8

Показать файл

Файл: load.py Проект: aerler/WRF-Projects

def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, stations=None, varlist=None, slices=None,
                          aggregation='mean', dataset_mode='time-series', lWSC=True, WSC_period=None, shapetype=None, 
                          variable_list=None, basin_list=None, lforceList=True, obs_ts=None, obs_clim=None, 
                          name=None, title=None, obs_list=None, ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape observations based on 'aggregation' and 'varlist' (mainly add WSC gage data) '''
  if obs_list is None: obs_list = observational_datasets
  if name is None: name = 'obs'
  if title is None: title = 'Observations'
  # variables for which ensemble expansion is not supported
  not_supported = ('season','seasons','varlist','mode','dataset_mode','provs','basins','shapes',) 
  # resolve variable list (no need to maintain order)
  if isinstance(varlist,str): varlist = [varlist]
  variables = set(shp_params)
  for name in varlist: 
      if name in variable_list: variables.update(variable_list[name].vars)
      elif lforceList: raise VariableError("Variable list '{}' does not exist.".format(name))
      else: variables.add(name)
  variables = list(variables)
  # determine if we need gage dataset
  lWSC = isinstance(basins,str) and any([var in WSC_vars for var in variables]) and lWSC # doesn't work if multiple basins are loaded
  # default obs list
  if obs is None: obs = ['Observations',]
  elif isinstance(obs,str): obs = [obs]
  elif isinstance(obs,tuple): obs = list(obs)
  elif not isinstance(obs,list): raise TypeError(obs)
  # configure slicing (extract basin/province/shape and period)
  expand_vars = ('basins','stations','provs','shapes','slices') # variables that need to be added to slices (and expanded first)
  if ensemble_list: expand_list = [varname for varname in expand_vars if varname in ensemble_list]
  if ensemble_list and expand_list:
      local_vars = locals(); exp_args = dict()
      for varname in expand_vars: # copy variables to expand right away
          exp_args[varname] = local_vars[varname]
      for varname in expand_list: # remove entries from ensemble expansion
          if  varname != 'slices': ensemble_list.remove(varname) # only 'slices' will continue to be expanded
      if 'slices' not in ensemble_list: ensemble_list.append('slices')
      slices = [_configSlices(**arg_dict) for arg_dict in expandArgumentList(expand_list=expand_list, lproduct=ensemble_product, **exp_args)]
  else:
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, stations=stations, period=None)
  # substitute default observational dataset and seperate aggregation methods
  iobs = None; clim_ens = None
  for i,obs_name in reverse_enumerate(obs):
      # N.B.: we need to iterate in reverse order, so that deleting items does not interfere with the indexing
      if obs_name in obs_aliases or obs_name not in timeseries_datasets:
          if iobs is not None: raise ArgumentError("Can only resolve one default dataset: {}".format(obs))
          if aggregation == 'mean' and seasons is None and obs_clim is not None: 
              # remove dataset entry from list (and all the arguments)
              del obs[i]; iobs = i # remember position of default obs in ensemble              
              clim_args = kwargs.copy(); slc = slices; shp = shapetype
              # clean up variables for ensemble expansion, if necessary
              if ensemble_list and ensemble_product.lower() == 'inner':
                  if 'names' in ensemble_list:
                      obs_names = [obs_clim]
                      for arg in ensemble_list:
                          if arg in ('slices','shape'): pass # dealt with separately
                          elif arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif arg in kwargs: 
                              clim_args[arg] = kwargs[arg][iobs]; del kwargs[arg][iobs]
                          else: 
                              raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                      if 'slices' in ensemble_list: slc = slices[iobs]; del slices[iobs]
                      if 'shape' in ensemble_list: shp = shapetype[iobs]; del shapetype[iobs]
                      clim_len = 1 # expect length of climatology ensemble
                  else: 
                      obs_names = obs_clim # no name expansion
                      clim_len = None # expect length of climatology ensemble
                      for arg in ensemble_list:
                          if arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif 'slices' in ensemble_list: l = len(slc) 
                          elif 'shape' in ensemble_list: l = len(shp)
                          elif arg in clim_args: l = len(clim_args[arg])
                          else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                          if clim_len is None: clim_len = l
                          elif l != clim_len: raise ArgumentError(arg,l,clim_len)
              elif ensemble_list and ensemble_product.lower() == 'outer':
                  clim_len = 1
                  for arg in ensemble_list:
                      if arg != 'names':
                        assert isinstance(clim_args[arg],(list,tuple)), clim_args[arg] 
                        clim_len *= len(clim_args[arg])
                  obs_names = [obs_clim] if 'names' in ensemble_list else obs_clim
              else:
                  obs_names = [obs_clim]; clim_len = 1
              # now load climtology instead of time-series and skip aggregation
              try:
                  clim_ens = loadEnsemble(names=obs_names, season=seasons, aggregation=None, slices=slc, varlist=variables, 
                                          ldataset=False, dataset_mode='climatology', shape=shp,
                                          ensemble_list=ensemble_list, ensemble_product=ensemble_product, 
                                          obs_list=obs_list, basin_list=basin_list, **clim_args)
                  assert len(clim_ens) == clim_len, clim_ens
              except EmptyDatasetError: pass
          else: 
              obs[i] = obs_ts # trivial: just substitute default name and load time-series
  # prepare and load ensemble of observations
  if len(obs) > 0:
      if len(obs) == 1 and ensemble_list and 'names' not in ensemble_list: obs = obs[0]
      try:
          obsens = loadEnsemble(names=obs, season=seasons, aggregation=aggregation, slices=slices,
                                varlist=variables, ldataset=False, dataset_mode=dataset_mode, 
                                shape=shapetype, obs_list=obs_list, basin_list=basin_list, 
                                ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)          
      except EmptyDatasetError:
          obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  else: 
      obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  # add default obs back in if they were removed earlier
  if clim_ens is not None:
      for clim_ds in clim_ens[::-1]: # add observations in correct order: adding backwards allows successive insertion ...
          obsens.insertMember(iobs,clim_ds) # ... at the point where the name block starts
  # load stream gage data from WSC; should not interfere with anything else; append to ensemble
  if lWSC: # another special case: river hydrographs
      from datasets.WSC import GageStationError, loadGageStation
      try:
          if aggregation is not None and seasons is None: dataset_mode = 'climatology' # handled differently with gage data
          if WSC_period is None: WSC_period = kwargs.get('obs_period',kwargs.get('period',None))
          dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, period=WSC_period, 
                                    mode=dataset_mode, filetype='monthly', basin_list=basin_list, lfill=True, lexpand=True) # always load runoff/discharge
          if seasons:
              method = aggregation if aggregation.isupper() else aggregation.title() 
              if aggregation: dataset = getattr(dataset,'seasonal'+method)(season=seasons, taxis='time')
              else: dataset = dataset.seasonalSample(season=seasons)
          if slices is not None: dataset = dataset(**slices) # slice immediately
          obsens += dataset.load()
      except GageStationError: 
          pass # just ignore, if gage station data is missing 
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  return obsens

Пример #9

Показать файл

Файл: load.py Проект: aerler/WRF-Projects

def loadShapeEnsemble(names=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, 
                      aggregation='mean', slices=None, shapetype=None, filetypes=None, 
                      period=None, obs_period=None, WSC_period=None, name=None, title=None,
                      variable_list=None, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, 
                      basin_list=None, lforceList=True, obs_list=None, obs_ts=None, obs_clim=None, 
                      ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape ensembles (in Ensemble container) or observations; kwargs are passed to loadEnsembleTS '''
  names = list(names) # make a new list (copy)
  # separate observations
  if obs_list is None: obs_list = observational_datasets
  obs_names = []; iobs = []; ens_names = []; iens = []
  for i,name in enumerate(names):
      if name in obs_list or name in obs_aliases:
          obs_names.append(name); iobs.append(i)          
      else: 
          ens_names.append(name); iens.append(i)
  assert len(iens) == len(ens_names) and len(iobs) == len(obs_names) 
  if len(obs_names) > 0:       
      # assemble arguments
      obs_args = dict(obs=obs_names, seasons=seasons, basins=basins, provs=provs, shapes=shapes, varlist=varlist, 
                      slices=slices, aggregation=aggregation, shapetype=shapetype, 
                      period=period, obs_period=obs_period, obs_ts=obs_ts, obs_clim=obs_clim, 
                      variable_list=variable_list, basin_list=basin_list, WSC_period=WSC_period,
                      ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)
      # check if we have to modify to preserve ensemble_list expansion
      if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: 
          for key in ensemble_list:
              if key != 'names':
                  ens_list = obs_args[key]
                  obs_args[key] = [ens_list[i] for i in iobs]
      # observations for basins require special treatment to merge basin averages with gage values
      # load observations by redirecting to appropriate loader function
      obsens = loadShapeObservations(name=name, title=title, obs_list=obs_list, **obs_args)
  else: obsens = []
  if len(ens_names) > 0: # has to be a list
      # prepare arguments
      variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, 
                                             params=shp_params, variable_list=variable_list, lforceList=lforceList)
      # configure slicing (extract basin/province/shape and period)
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period)
      # assemble arguments
      ens_args = dict(names=ens_names, season=seasons, slices=slices, varlist=variables, shape=shapetype, 
                      aggregation=aggregation, period=period, obs_period=obs_period, 
                      WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, filetypes=filetypes, 
                      ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)
      # check if we have to remove obs datasets to preserve ensemble_list expansion
      if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: 
          for key in ensemble_list:
              if key != 'names':
                  ens_list = ens_args[key]
                  ens_args[key] = [ens_list[i] for i in iens]
      # load ensemble (no iteration here)
      shpens = loadEnsemble(name=name, title=title, obs_list=obs_list, **ens_args)
  else: shpens = Ensemble(name=name, title=title, basetype='Dataset')
  # get resolution tag (will be added below)
  res = None
  for member in shpens:
      if 'resstr' in member.atts:
          if res is None: res = member.atts['resstr']
          elif res != member.atts['resstr']:
              res = None; break # no common resolution
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  if len(obsens) > 0 and len(shpens) > 0:
      for name,i in zip(obs_names,iobs): 
          shpens.insertMember(i,obsens[name]) # add known observations in correct order
          del obsens[name] # remove the ones we already know from list, so we can deal with the rest
      j = i + 1 # add remaining obs datasets after last one
      for i,obs in enumerate(obsens): shpens.insertMember(j+i,obs)
  elif len(obsens) > 0 and len(shpens) == 0:
      shpens = obsens
  shpens.resolution = res # ad resolution tag now, to make sure it is there 
  return shpens