示例#1
0
文件: dataset.py 项目: ocefpaf/paegan
    def nc_object(ncfile, tname='time'):

        if isinstance(ncfile, str):
            try:
                return netCDF4.Dataset(ncfile)
            except (IOError, RuntimeError, IndexError):
                # Are we a set of files?
                try:
                    return netCDF4.MFDataset(ncfile)
                except (IOError, RuntimeError, IndexError):
                    try:
                        return netCDF4.MFDataset(ncfile, aggdim=tname)
                    except (IOError, RuntimeError, IndexError):
                        try:
                            # Unicode isn't working sometimes?
                            return netCDF4.MFDataset(str(ncfile), aggdim=tname)
                        except Exception:
                            logger.exception("Can not open %s" % ncfile)
                            raise
                except ValueError:
                    # Probably a DAP endpoint
                    logger.exception("Can not open %s" % ncfile)
                    raise
            except Exception:
                logger.exception("Can not open %s" % ncfile)
                raise
        elif isinstance(ncfile, Dataset):
            # Passed in paegan Dataset object
            return ncfile.nc
        elif isinstance(ncfile, netCDF4.Dataset) or isinstance(ncfile, netCDF4.MFDataset):
            # Passed in a netCDF4 Dataset object
            return ncfile
示例#2
0
def get_measurements(data_dir, quantity, lat, lon, start_time, end_time):
    """return data for a given location"""

    data_dir = pathlib.Path(data_dir)
    v_urls = list(sorted(data_dir.glob('vwnd.10m.gauss.*.nc')))
    u_urls = list(sorted(data_dir.glob('uwnd.10m.gauss.*.nc')))
    logger.info("reading data in %s, found urls: %s and %s", data_dir, u_urls,
                v_urls)
    # get all data required to find correct dataset
    data = {}
    with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u:
        # lookup variables in both files
        t_u = ds_u.variables['time'][:]
        lon_u = ds_u.variables['lon'][:]
        lat_u = ds_u.variables['lat'][:]

    # don't use num2date from netcdf4, too slow
    t0 = np.datetime64('1800-01-01', 'm')

    # use variables from u
    data['t'] = t0 + t_u.astype('timedelta64[h]')
    data['lon'] = lon_u
    data['lat'] = lat_u
    lat_idx = np.argmin(np.abs(data['lat'] - lat))
    lon_idx = np.argmin(np.abs(data['lon'] - lon))
    data['lat'][lat_idx], data['lon'][lon_idx], (lat_idx, lon_idx)
    t_range = np.asarray([start_time, end_time], 'datetime64[m]')
    t_start_idx, t_end_idx = np.searchsorted(data['t'], t_range)
    t_start_idx, t_end_idx

    # slice
    s = np.s_[t_start_idx:t_end_idx, lat_idx, lon_idx]

    names = {}
    units = {}
    with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u:
        data['u'] = ds_u.variables['uwnd'][s]
        names['u'] = ds_u.variables['uwnd'].long_name
        units['u'] = ds_u.variables['uwnd'].units
    with netCDF4.MFDataset(v_urls, aggdim='time') as ds_v:
        data['v'] = ds_v.variables['vwnd'][s]
        names['v'] = ds_u.variables['uwnd'].long_name
        units['v'] = ds_u.variables['uwnd'].units

    series = pd.DataFrame(data=dict(
        dateTime=data['t'][t_start_idx:t_end_idx], u=data['u'], v=data['v']))
    # make sure we serialize to json
    series = json.loads(json.dumps(series, cls=CustomEncoder))
    response = {"series": series}
    return response
示例#3
0
 def MFDataset(ncfile):
     """Return an MFnetCDF4 object given a string or list.  A string is expanded
        with wildcards using glob.  A netCDF4 or MFnetCDF4 object returns itself."""
     if isinstance(ncfile, str):
         ncfiles = glob(ncfile)
         return netCDF.MFDataset(sorted(ncfiles))
     elif isinstance(ncfile, list) or isinstance(ncfile, tuple):
         return netCDF.MFDataset(sorted(ncfile))
     elif hasattr(ncfile, 'variables'):  # accept any oject with a variables attribute
         assert isinstance(ncfile.variables, dict), \
                'variables attribute must be a dictionary'
         return ncfile
     else:
         raise TypeError('type %s not supported' % type(ncfile))
         return MFnetCDF4.Dataset(files)
示例#4
0
def get_grid_info(data_dir):
    info = {}
    data_dir = pathlib.Path(data_dir)
    v_urls = list(sorted(data_dir.glob('vwnd.10m.gauss.*.nc')))
    u_urls = list(sorted(data_dir.glob('uwnd.10m.gauss.*.nc')))
    info['urls'] = u_urls + v_urls
    with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u:
        attrs = ds_u.ncattrs()
        for attr in attrs:
            info[attr] = getattr(ds_u, attr)
    with netCDF4.MFDataset(v_urls, aggdim='time') as ds_v:
        attrs = ds_v.ncattrs()
        for attr in attrs:
            info[attr] = getattr(ds_v, attr)
    return info
示例#5
0
def _get_dataset(filename):
    df = None
    if isinstance(filename, basestring):
        df = nc4.Dataset(filename)
    else:
        df = nc4.MFDataset(filename)
    return df
示例#6
0
 def __init__(self, path_to_files, analysis_file="lfff00000000c.nc"):
     self._date_time_regex = re.compile(
         "(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})")
     self._variables = None
     self._init_time = None
     self._history_interval = None
     self._timesteps = None
     self._lats = None
     self._lons = None
     self._rlats = None
     self._rlons = None
     self._xshape = None
     self._yshape = None
     self._last_time = None
     self._grid_north_pole_lat = None
     self._grid_north_pole_lon = None
     self._rotated_grid = None
     self._analysis_file = analysis_file
     self._cosmo_file_path = os.path.join(path_to_files, '')
     self._files_in_path = list(
         set(glob.glob(self._cosmo_file_path + "lfff*.nc")) -
         set(glob.glob(self._cosmo_file_path + self._analysis_file)))
     self._files_in_path.sort()
     self._num_of_files = len(self._files_in_path)
     if self._num_of_files < 1:
         raise ValueError(
             "COSMOPython Lib: No COSMO netCDF dataset found. Check the path"
         )
     try:
         self._cosmo_multifile = netCDF4.MFDataset(self._files_in_path)
     except:
         raise ValueError(
             "COSMOPythonLib: netCDF File(s) could not be opened. Corrupt file(s)?"
         )
     self.__create_meta_data()
示例#7
0
def land_sea_mask():
    """ builds the boolean land sea mask """
    with netCDF4.MFDataset("%s/*.sp.nc" %
                           wam2layers_config.data_dir) as dataset:
        latitude = dataset.variables['latitude'][:]
        longitude = wrap_lon180(dataset.variables['longitude'][:])

    gridsize = latitude[0] - latitude[1]
    xgrd, ygrd = np.meshgrid(longitude, latitude)

    filename = '%s/lsm.%s.npy' % (wam2layers_config.data_dir, gridsize)

    if os.path.isfile(filename):
        mask = np.unpackbits(np.load(filename)).astype('bool')[:xgrd.size]
    else:
        shp = cartopy.io.shapereader.natural_earth(resolution='110m',
                                                   category='physical',
                                                   name='land')
        # load the shapefile to use as mask and build the polygons
        shp = cartopy.io.shapereader.Reader(shp)
        geoms = shp.geometries()
        polygon = shapely.ops.cascaded_union(list(geoms))
        mask = inpolygon(polygon, zip(xgrd.ravel(), ygrd.ravel()))
        np.save(filename, np.packbits(mask))

    return xgrd, ygrd, mask.reshape(xgrd.shape)
示例#8
0
def split_to_monthly_vars(src_root: str, dst_root: str, year: int, domain_num: int,
                          filetype: str, month: int, included_vars=None, included_plev_nums=None):
    """Split and convert a netCDF Classic (3/4) file to CMOR specs"""
    global current_file
    infiles = os.path.join(src_root, tabs.src_file_pattern(filetype, domain_num, year, month))
    with netCDF4.MFDataset(infiles) as src:
        print('CMORize split:', src_root, '-->', dst_root)
        print('inputs:', os.path.basename(infiles))
        time_values = None
        # loop source variables
        for src_vname, src_var in src.variables.items():
            if src_vname not in tabs.varmap:
                continue
            vname = tabs.varmap[src_vname]
            if included_vars and vname not in included_vars:
                continue
            print('', src_vname, src_var.dimensions, end='', flush=True)
            values = None
            # Loop through possible plevels
            start, finish = get_plevels_range(vname)
            if start > 0: print('')
            for plev_num in range(start, finish):
                if plev_num and included_plev_nums and (plev_num not in included_plev_nums):
                    continue
                outfile = os.path.join(dst_root, tabs.dst_file_month(vname, domain_num, year, month, plev_num))
                outfinal = os.path.join(dst_root, tabs.dst_file_year(vname, domain_num, year, plev_num))
                if os.path.isfile(outfile):
                    print('')
                    print('skipping existing:', outfile)
                    continue
                if os.path.isfile(outfinal):
                    print('')
                    print('skipping existing:', outfinal)
                    continue
                if values is None:
                    values = src_var[:]
                current_file = outfile
                with netCDF4.Dataset(outfile, 'w', format='NETCDF4_CLASSIC') as dst:
                    # Create missing dimensions required by variable and return destination dimensions
                    dst_dims = process_dimensions(src_var, src, dst)
                    
                    # Filter the main variable values
                    dst_values = process_values(vname, plev_num, values, dst)
                    
                    vname_full = tabs.full_vname(vname, plev_num)
                    print(' -->', vname_full, tabs.constants['domains'][domain_num], year, month, dst_dims, dst_values.dtype, end='', flush=True)

                    # Add the main variable
                    var_out = dst.createVariable(vname_full, dst_values.dtype, dst_dims, fill_value=tabs.constants['missing_value'],
                                                 zlib=(tabs.compress > 0), complevel=tabs.compress)
                    var_out[:] = dst_values

                    # Add time variable
                    if time_values is None:
                        time_values = process_values(tabs.constants['dst_timevar'], 0, src.variables[tabs.constants['src_timevar']][:], None)
                    var_out = dst.createVariable(tabs.constants['dst_timevar'], time_values.dtype, (tabs.constants['dst_timedim'],),
                                                 zlib=(tabs.compress > 0), complevel=tabs.compress)
                    var_out[:] = time_values
                    print('')
                current_file = None
示例#9
0
def extract_season(inroot, infiles, outroot, outfile, month, y_min, y_max):
    season = {0: 'full', 3: 's1', 6: 's2', 9: 's3', 12: 's4'}
    opr = MERGED
    basefile = outfile + '_%s_%s.nc' % (season[month], opr)
    outfile_full = os.path.join(outroot, season[month], opr, basefile)
    print('Output:')
    print(os.path.dirname(outfile_full))
    print('   ', os.path.basename(outfile_full))
    if os.path.isfile(outfile_full):
        print('    ...exists')
        return outfile_full

    tmp = os.path.join(outroot, str(uuid.uuid4()) + '.nc')
    infiles_full = [os.path.join(inroot, f) for f in infiles]

    with nc4.Dataset(infiles_full[0]) as src1, nc4.MFDataset(
            infiles_full) as src, nc4.Dataset(tmp, "w") as dst:
        write_nc4_season(src1, src, dst, y_min, y_max, month)

    odir = os.path.dirname(outfile_full)
    if not os.path.isdir(odir):
        os.makedirs(odir)
        print('Created dir:', odir)
    os.rename(tmp, outfile_full)
    return outfile_full
示例#10
0
文件: nc.py 项目: moghimis/ocgis
    def _open_(uri, mode='r', **kwargs):
        """
        :rtype: object
        """
        kwargs = kwargs.copy()
        group_indexing = kwargs.pop('group_indexing', None)
        lvm = kwargs.pop('vm', vm)

        if isinstance(uri, six.string_types):
            # Open the dataset in parallel if we want to use the netCDF MPI capability. It may not be available even in
            # parallel.
            if mode == 'w' and lvm.size > 1:
                if kwargs.get('format', 'NETCDF4') == 'NETCDF4':
                    if kwargs.get('parallel') is None and env.USE_NETCDF4_MPI:
                        kwargs['parallel'] = True
                    if kwargs.get('parallel') and kwargs.get('comm') is None:
                        kwargs['comm'] = lvm.comm
            ret = nc.Dataset(uri, mode=mode, **kwargs)
            # tdk:FIX: this should be enabled for MFDataset as well. see https://github.com/Unidata/netcdf4-python/issues/809#issuecomment-435144221
            # netcdf4 >= 1.4.0 always returns masked arrays. This is inefficient and is turned off by default by ocgis.
            if hasattr(ret, 'set_always_mask'):
                ret.set_always_mask(False)
        else:
            ret = nc.MFDataset(uri, **kwargs)

        if group_indexing is not None:
            for group_name in get_iter(group_indexing):
                ret = ret.groups[group_name]

        return ret
示例#11
0
def load_model(model_path, start_date, end_date, field, nowcast_flag=False):
    """Loads model grid_T data in date range defined by start_date and end_date
    Only considers daily averaged model fields.
    Returns model depths, variable defined by field, and dates associated with
    variable
    """

    files = analyze.get_filenames(start_date, end_date, '1d', 'grid_T',
                                  model_path)
    if nowcast_flag:
        var, dates = analyze.combine_files(files, field, np.arange(0, 40),
                                           np.arange(0, 898),
                                           np.arange(0, 398))
        tmp = nc.Dataset(files[0])
        depth = tmp.variables['deptht'][:]
    else:
        tracers = nc.MFDataset(files)
        time = tracers.variables['time_counter']
        # convert date
        dates = []
        start = datetime.datetime.strptime(time.time_origin,
                                           ' %Y-%b-%d %H:%M:%S')
        for t in time[:]:
            d = start + datetime.timedelta(seconds=t)
            dates.append(d)
        depth = tracers.variables['deptht'][:]
        var = tracers.variables[field][:]

    return depth, var, dates
示例#12
0
文件: lib.py 项目: ivicajan/seapy
def netcdf(file, aggdim=None):
    """
    Wrapper around netCDF4 to open a file as either a Dataset or an
    MFDataset.

    Parameters
    ----------
    file : string or list,
        Filename(s) to open. If the string has wildcards or is a list,
        this attempts to open an MFDataset
    aggdim : string,
        Name of dimension to concatenate along if loading a set of files.
        A value of None (default) uses the unlimited dimension.

    Returns
    -------
    netCDF4 Dataset or MFDataset
    """
    import netCDF4
    try:
        nc = netCDF4.Dataset(file)
    except (OSError, RuntimeError):
        try:
            nc = netCDF4.MFDataset(file, aggdim=aggdim)
        except IndexError:
            raise FileNotFoundError("{:s} cannot be found.".format(file))
    return nc
def read_ncdf(name,path='/work/gg0877/KST/tide_gaughes/ncdf',origin='2012-01-01 00:00:00'):
  ncname = '_'.join(word[0].upper()+word[1:] for word in name.split('_'))
  if path[-6:]=='hawaii':
    nc = netCDF4.MFDataset(path+'/'+ncname+'*.nc')
  else:
    nc = netCDF4.Dataset(path+'/'+ncname+'.nc')
  ncv = nc.variables
  ot = utime('seconds since '+origin)
  ut = utime(ncv['time'].units)
  time = ot.date2num(ut.num2date(ncv['time'][:]))
  print(ncv.keys())
  scale = {'cm':100.,'m':1.0,'millimeters':1000.,'mm':1000.}

  if 'elev' in ncv:
    if 'units' in ncv['elev'].ncattrs():
      scale_factor = scale[ncv['elev'].units]
    else:
      if ncv['elev'][:].std()>100.:
        scale_factor=1000.
      elif ncv['elev'][:].std()>10.:
        scale_factor=100.
      else:
        scale_factor=1.0
    elev = ncv['elev'][:].squeeze()/scale_factor
  elif 'sea_surface_height_above_reference_level' in ncv:
    if 'units' in ncv['sea_surface_height_above_reference_level'].ncattrs():
      scale_factor = scale[ncv['sea_surface_height_above_reference_level'].units]
    else:
      scale_factor = 1000.
    elev = ncv['sea_surface_height_above_reference_level'][:].squeeze()/scale_factor
  nc.close()
  return time,elev
示例#14
0
def ncopen(f, mode='r'):
    if not isstr(f) or any([i in f for i in '*?']):
        nc = netCDF4.MFDataset(f)
    else:
        nc = netCDF4.Dataset(f, mode)

    return nc
示例#15
0
文件: nc.py 项目: wk1984/ocgis
    def _open_(uri, mode='r', **kwargs):
        """
        :rtype: object
        """
        kwargs = kwargs.copy()
        group_indexing = kwargs.pop('group_indexing', None)
        lvm = kwargs.pop('vm', vm)

        if isinstance(uri, six.string_types):
            # Open the dataset in parallel if we want to use the netCDF MPI capability. It may not be available even in
            # parallel.
            if mode == 'w' and lvm.size > 1:
                if kwargs.get('format', 'NETCDF4') == 'NETCDF4':
                    if kwargs.get('parallel') is None and env.USE_NETCDF4_MPI:
                        kwargs['parallel'] = True
                    if kwargs.get('parallel') and kwargs.get('comm') is None:
                        kwargs['comm'] = lvm.comm
            ret = nc.Dataset(uri, mode=mode, **kwargs)
        else:
            ret = nc.MFDataset(uri, **kwargs)

        if group_indexing is not None:
            for group_name in get_iter(group_indexing):
                ret = ret.groups[group_name]

        return ret
示例#16
0
def open_files(ncfiles, return_dsvar=False):
    """Open netCDF files, either with xray or netCDF4"""
    try:
        if _ncmodule == 'xray':
            # open files with xray
            try:
                ds = xray.open_mfdataset(ncfiles)
            except ValueError:
                ds = xray.open_mfdataset(ncfiles, decode_times=False)
                print('Warning: Using decode_times=False')
            dsvar = ds
        else:
            # open files with netCDF4
            if len(ncfiles) > 1:
                ds = netCDF4.MFDataset(ncfiles)
            else:
                ds = netCDF4.Dataset(ncfiles[0])
            dsvar = ds.variables
    except RuntimeError as err:
        traceback.print_exc(err)
        print('Warning: File(s) could not be opened: {}'.format(ncfiles))
        dsvar = None
    if return_dsvar:
        return ds, dsvar
    else:
        return ds
def lsm_demo():
    """ plots a masked layer of time-averaged surface pressure """

    import netCDF4
    import cartopy.crs
    import matplotlib.pyplot as plt
    import wam2layers_config
    import numpy.ma as ma


    xgrd, ygrd, mask = wam2layers.land_sea_mask()

    with netCDF4.MFDataset("%s/*.sp.nc" % wam2layers_config.data_dir) as dataset:
        pres = np.average(dataset.variables['sp'][:], axis=0)

    projection = cartopy.crs.PlateCarree()

    # plot mask
    fig, axis = plt.subplots(subplot_kw=dict(projection=projection))
    axis.plot(xgrd[mask], ygrd[mask], 'k.', alpha=0.25)

    # plot masked data
    fig, axis = plt.subplots(subplot_kw=dict(projection=projection))
    #cs = ax.pcolormesh(longitude, latitude, ma.masked_array(pressure, ~m))
    plt.contourf(ma.masked_array(xgrd, mask),
                 ma.masked_array(ygrd, mask),
                 ma.masked_array(pres, ~mask))
    axis.coastlines(resolution='50m')
    axis.set_extent([-90, -30, -40, +20])
示例#18
0
文件: dataset.py 项目: imclab/ocgis
 def _ds(self):
     if self.__ds is None:
         try:
             self.__ds = nc.Dataset(self.request_dataset.uri,'r')
         ## likely multiple uris...
         except TypeError:
             self.__ds = nc.MFDataset(self.request_dataset.uri)
     return(self.__ds)
示例#19
0
def nc_cherche_calendrier(l_f):
    """trouve le calendrier d'une liste
    de fichier netCDF
    """
    ds = nc.MFDataset(l_f)
    calendrier = ds.variables['time'].calendar
    ds.close()
    return calendrier
示例#20
0
def main(cmdLineArgs,stream=False):
  numpy.seterr(divide='ignore', invalid='ignore', over='ignore') # To avoid warnings

  if not os.path.exists(cmdLineArgs.gridspec): raise ValueError('Specified gridspec directory/tar file does not exist.')
  if os.path.isdir(cmdLineArgs.gridspec):
    x = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['x'][::2,::2]
    xcenter = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['x'][1::2,1::2]
    y = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['y'][::2,::2]
    ycenter = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['y'][1::2,1::2]
    msk = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_mask.nc').variables['mask'][:]
    area = msk*netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_hgrid.nc').variables['area'][:,:].reshape([msk.shape[0], 2, msk.shape[1], 2]).sum(axis=-3).sum(axis=-1)
    depth = netCDF4.Dataset(cmdLineArgs.gridspec+'/ocean_topog.nc').variables['depth'][:]
  elif os.path.isfile(cmdLineArgs.gridspec):
    x = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','x')[::2,::2]
    xcenter = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','x')[1::2,1::2]
    y = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','y')[::2,::2]
    ycenter = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','y')[1::2,1::2]
    msk = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_mask.nc','mask')[:]
    area = msk*m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_hgrid.nc','area')[:,:].reshape([msk.shape[0], 2, msk.shape[1], 2]).sum(axis=-3).sum(axis=-1)
    depth = m6toolbox.readNCFromTar(cmdLineArgs.gridspec,'ocean_topog.nc','depth')[:]
  else:
    raise ValueError('Unable to extract grid information from gridspec directory/tar file.') 
  
  
  Sobs = netCDF4.Dataset( cmdLineArgs.woa ).variables['salt']
  if len(Sobs.shape)==3: Sobs = Sobs[0]
  else: Sobs = Sobs[:,0].mean(axis=0)

  rootGroup = netCDF4.MFDataset( cmdLineArgs.infile )
  if 'salt' in rootGroup.variables: varName = 'salt'
  elif 'so' in rootGroup.variables: varName = 'so'
  else: raise Exception('Could not find "salt" or "so" in file "%s"'%(cmdLineArgs.infile))
  if rootGroup.variables[varName].shape[0]>1: Smod = rootGroup.variables[varName][:,0].mean(axis=0)
  else: Smod = rootGroup.variables[varName][0,0]
  
  if cmdLineArgs.suptitle != '':  suptitle = cmdLineArgs.suptitle + ' ' + cmdLineArgs.label
  else: suptitle = rootGroup.title + ' ' + cmdLineArgs.label

  imgbufs = []
  ci=m6plot.pmCI(0.125,2.25,.25)
  if stream is True: img = io.BytesIO()
  else: img = cmdLineArgs.outdir+'/SSS_bias_WOA05.png'
  m6plot.xyplot( Smod - Sobs , x, y, area=area,
      suptitle=suptitle, title='SSS bias (w.r.t. WOA\'05) [ppt]',
      clim=ci, colormap='dunnePM', centerlabels=True, extend='both',
      save=img)
  if stream is True: imgbufs.append(img)
  
  m6plot.xycompare( Smod, Sobs , x, y, area=area,
      suptitle=suptitle,
      title1='SSS [ppt]',
      title2='WOA\'05 SSS [ppt]',
      clim=m6plot.linCI(20,30,10, 31,39,.5), colormap='dunneRainbow', extend='both',
      dlim=ci, dcolormap='dunnePM', dextend='both', centerdlabels=True,
      save=cmdLineArgs.outdir+'/SSS_bias_WOA05.3_panel.png')

  if stream is True:
    return imgbufs
示例#21
0
def read_list(files_list, var):
    from joblib import Parallel, delayed
    ctime_read = checkpoint(0)

    print '  -->  READING FILES '
    wrfvar = (getwrfname(var)[0]).split('-')

    if len(files_list) <= 15:
        method = 'MFDataset'
    else:
        method = 'Dataset'

    # ---------------------
    if method == 'MFDataset':
        print files_list
        fin = nc.MFDataset(files_list)  # Read all files
        print '   -->   EXTRACTING VARIABLE Time'
        time = fin.variables['Times'][:]  # Get time variable

        print '   -->   EXTRACTING VARIABLE ', var
        varvals = get_wrfvars(wrfvar, fin)
        fin.close()
    # ---------------------

    # ---------------------
    if method == 'Dataset':
        varvals = {}

        njobs = 10
        nlen = len(files_list) / njobs  #time step block length
        a = len(files_list) - njobs * nlen
        nt_v = np.zeros(njobs)
        nt_v[:] = nlen
        nt_v[njobs - 1] = nlen + a  #block length for each job
        nt_v = nt_v.cumsum()

        files_in = [(files_list[0:int(nt_v[0])], wrfvar)]
        for tt in np.arange(1, njobs):
            files_in.append(
                (files_list[int(nt_v[tt - 1]):int(nt_v[tt])], wrfvar))

        var_v = Parallel(n_jobs=njobs)(delayed(read_block)(*files_in[i])
                                       for i in xrange(len(files_in)))

        for i in np.arange(0, njobs):
            if i == 0:
                time = var_v[i][0]
                for ii, wrfv in enumerate(wrfvar):
                    varvals[wrfv] = var_v[i][1][wrfv]
            else:
                time = np.concatenate((time, var_v[i][0]))
                for ii, wrfv in enumerate(wrfvar):
                    varvals[wrfv] = np.concatenate(
                        (varvals[wrfv], var_v[i][1][wrfv]))
    # ---------------------

    ctime = checkpoint(ctime_read)
    return np.asarray(time), varvals
示例#22
0
def main(cmdLineArgs,stream=None):
  rootGroupT = netCDF4.MFDataset( cmdLineArgs.annual_directory + '/*.thetao_xyave.nc' )
  rootGroupS = netCDF4.MFDataset( cmdLineArgs.annual_directory + '/*.so_xyave.nc' )
  if 'thetao_xyave' not in rootGroupT.variables: raise Exception('Could not find "thetao_xyave" files "%s"'%(cmdLineArgs.annual_directory))
  if 'so_xyave' not in rootGroupS.variables: raise Exception('Could not find "so_xyave" files "%s"'%(cmdLineArgs.annual_directory))

  zt = rootGroupT.variables['zt'][::-1] * -1
  timeT = rootGroupT.variables['time']
  timeS = rootGroupS.variables['time']
  timeT = numpy.array([int(x.year) for x in netCDF4.num2date(timeT[:],timeT.units,calendar=timeT.calendar)])
  timeS = numpy.array([int(x.year) for x in netCDF4.num2date(timeS[:],timeS.units,calendar=timeS.calendar)])

  if cmdLineArgs.trange != None:
    start = list(timeT).index(cmdLineArgs.trange[0])
    end = list(timeT).index(cmdLineArgs.trange[1])
  else:
    start = 0
    end = -1

  variable = rootGroupT.variables['thetao_xyave']
  T = variable[start:end] - variable[start]
  T = T[:,::-1]
  timeT = timeT[start:end]

  variable = rootGroupS.variables['so_xyave']
  S = variable[start:end] - variable[start]
  S = S[:,::-1]
  timeS = timeS[start:end]

  if cmdLineArgs.suptitle != '':  suptitle = cmdLineArgs.suptitle + ' ' + cmdLineArgs.label
  else: suptitle = rootGroupT.title + ' ' + cmdLineArgs.label

  if stream != None: objOut = stream[0]
  else: objOut = cmdLineArgs.outdir+'/T_drift.png'
  m6plot.ztplot( T, timeT, zt, splitscale=[0., -1000., -6500.],
      suptitle=suptitle, title='Potential Temperature [C]',
      extend='both', colormap='dunnePM', autocenter=True,
      clim=cmdLineArgs.climT,save=objOut)

  if stream != None: objOut = stream[1]
  else: objOut = cmdLineArgs.outdir+'/S_drift.png'
  m6plot.ztplot( S, timeS, zt, splitscale=[0., -1000., -6500.],
      suptitle=suptitle, title='Salinity [psu]',
      extend='both', colormap='dunnePM', autocenter=True,
      clim=cmdLineArgs.climS,save=objOut)
示例#23
0
def _get_dataset(filename, dataset=None):
    if dataset is not None:
        return dataset
    df = None
    if isinstance(filename, basestring):
        df = nc4.Dataset(filename)
    else:
        df = nc4.MFDataset(filename)
    return df
示例#24
0
 def __init__(self,
              cmdLineArgs,
              section,
              var,
              label=None,
              ylim=None,
              mks2Sv=True):
     if not isinstance(section, list):
         section = [section]
     if not isinstance(var, list):
         var = [var]
     self.section = section[0]
     self.var = var
     if label != None: self.label = label
     else: self.label = section[0]
     self.ylim = ylim
     for k in range(0, len(section)):
         try:
             rootGroup = netCDF4.MFDataset(cmdLineArgs.ts_directory +
                                           section[k] +
                                           '/ts/120hr/20yr/*.' +
                                           var[k] + '.nc')
         except:
             rootGroup = netCDF4.MFDataset(cmdLineArgs.ts_directory +
                                           section[k] +
                                           '/ts/120hr/5yr/*.' + var[k] +
                                           '.nc')
         if k == 0:
             total = numpy.ones(
                 rootGroup.variables[var[k]][:].shape[0]) * 0.0
         trans = rootGroup.variables[var[k]][:].sum(
             axis=1)  # Depth summation
         if var[k] == 'umo': total = total + trans.sum(axis=1).squeeze()
         elif var[k] == 'vmo':
             total = total + trans.sum(axis=2).squeeze()
         else:
             raise ValueError('Unknown variable name')
     if mks2Sv == True: total = total * 1.e-9
     self.data = total
     self.time = rootGroup.variables['time'][:] * (1 / 365.0)
     if cmdLineArgs.suptitle != '':
         self.suptitle = cmdLineArgs.suptitle + ' ' + cmdLineArgs.label
     else:
         self.suptitle = rootGroup.title + ' ' + cmdLineArgs.label
示例#25
0
    def inventory(self):
        """
        Report on data avaialbe in directory: time slice, variables, area
        """
        print("\n\n\n")
        print("=== INVENTORY FOR GLOBSIM ERA-INTERIM DATA === \n")
        print("Download parameter file: \n" + self.pfile + "\n")
        # loop over filetypes, read, report
        file_type = [
            'erai_pl_*.nc', 'erai_sa_*.nc', 'erai_sf_*.nc', 'erai_t*.nc'
        ]
        for ft in file_type:
            infile = path.join(self.directory, ft)
            nf = len(filter(listdir(self.directory), ft))
            print(str(nf) + " FILE(S): " + infile)

            if nf > 0:
                # open dataset
                ncf = nc.MFDataset(infile, 'r')

                # list variables
                keylist = [str_encode(x) for x in ncf.variables.keys()]

                print("    VARIABLES:")
                print("        " + str(len(keylist)) +
                      " variables, inclusing dimensions")
                for key in keylist:
                    print("        " + ncf.variables[key].long_name)

                # time slice
                time = ncf.variables['time']
                tmin = nc.num2date(min(time[:]),
                                   time.units,
                                   calendar=time.calendar).strftime('%Y/%m/%d')
                tmax = nc.num2date(max(time[:]),
                                   time.units,
                                   calendar=time.calendar).strftime('%Y/%m/%d')
                print("    TIME SLICE")
                print("        " + str(len(time[:])) + " time steps")
                print("        " + tmin + " to " + tmax)

                # area
                lon = ncf.variables['longitude']
                lat = ncf.variables['latitude']
                nlat = str(len(lat))
                nlon = str(len(lon))
                ncel = str(len(lat) * len(lon))
                print("    BOUNDING BOX / AREA")
                print("        " + ncel + " cells, " + nlon + " W-E and " +
                      nlat + " S-N")
                print("        N: " + str(max(lat)))
                print("        S: " + str(min(lat)))
                print("        W: " + str(min(lon)))
                print("        E: " + str(max(lon)))

                ncf.close()
示例#26
0
文件: inout.py 项目: guidov/tracpy
def setupROMSfiles(loc, date, ff, tout, time_units, tstride=1):
    """
    setupROMSfiles()
    Kristen Thyng, March 2013

    Figures out necessary files to read in for track times and what
    model output indices within those files to use.

    Args:
        loc: File location. loc can be a thredds server web address, a single
         string of a file location, a list of strings of multiple file
         locations to be searched through.
        date: datetime format start date
        ff: Time direction. ff=1 forward, ff=-1 backward
        tout: Number of model outputs to use
        time_units: To convert to datetime
        tstride: Stride in time, in case want to use less model output than
         is available. Default is 1, using all output.

    Returns:
        * nc - NetCDF object for relevant files
        * tinds - Indices of outputs to use from fname files
    """

    # For thredds server where all information is available in one place
    # or for a single file
    if 'http' in loc or type(loc) == str:
        nc = netCDF.Dataset(loc)

    # This is for the case when we have a bunch of files to sort through
    else:
        # the globbing should happen ahead of time so this case looks
        # different than the single file case
        # files in fname are in chronological order
        nc = netCDF.MFDataset(loc)

    # Convert date to number
    # dates = netCDF.num2date(nc.variables['ocean_time'][:], time_units)
    # The calendar definition extends dates to before the year 1582 for use
    # with idealized simulations without meaningful dates.
    dates = netCDF.num2date(nc.variables['ocean_time'][:],
                            time_units,
                            calendar='proleptic_gregorian')
    # time index with time value just below date (relative to file ifile)
    istart = find(dates <= date)[-1]

    # Select indices
    if ff == 1:
        # indices of model outputs desired
        tinds = range(istart, istart + tout, tstride)
    else:  # backward in time
        # have to shift istart since there are now new indices behind since
        # going backward
        tinds = range(istart, istart - tout, -tstride)

    return nc, tinds
示例#27
0
def extract_data(file):
	"""
	Purpose: To extract all of the data in a file(s) even the errors
	Inputs: File - the name of the file(s) to be read
	Outputs: 
		data - an array of mce data including raw data, header data, and on_off data.
			0 - A list of all times without errors [100 * number of files, 2]
			1 - mce0_raw_data without errors [100 * number of files, 33, 32, 100]
			2 - mce1_raw_data without errors [100 * number of files, 33, 32, 100]
			3 - mce0 on off data without errors [100 * number of files, 33, 32]
			4 - mce1 on off data without errors [100 * number of files, 33, 32]
			5 - head0 - mce0_header data without errors [100 * number of files, 1700, 1]
			6 - head1 - mce1 header data without errors [100 * number of files, 1700, 1]
			7 - status flags data without errors [100 * number of files, 1, 5]
		tel_data - an array of telescope data [100 * number of files, 20, 21]
		k_data - k mirror data
	"""
	f = nc.MFDataset(file)
	
	#flags to make sure we have returned data for mce and tel or not. will have to add kms flag
	mce_flag = False
	tel_flag = False 
	k_flag = False

	unix = f.variables['time'][:, 0]
	data = []
	counter = 0
	print('collecting K-mirror data')
	try:
		k_data = f.variables['kms'][:]
	except KeyError:
		k_flag = True
		print('WARNING missing k-mirror data')
	try:
		for var in f.variables:
			if 'mce' in var or var == 'status' or var == 'time':
				print('collecting %s data' % (var))
				data.append([])
				data[counter].append(f.variables[var][:])
				counter += 1
	except KeyError:
		mce_flag = True
		print('WARNING missing some or all MCE data')
	print('collecting tel_data')
	try:
		tel_data = f.variables['tel'][:]
	except KeyError:
		tel_flag = True
		print('WARNING missing telescope data')
	if tel_flag:
		tel_data = None
	if mce_flag:
		data = None
	if k_flag:
		k_data = None
	return data, tel_data, k_data
示例#28
0
def avg2csv(ncfile, csvname):

    print ncfile, csvname

    nc    = netCDF4.MFDataset(ncfile, 'r')
    names = nc.variables.keys()

    flux = {}

    for name in names:
        if nc.variables[name].ndim == 3:
            flux[name] = nc.variables[name][:,:,:].flatten()

    nt = len(nc.dimensions['ocean_time'])
    nx = len(nc.dimensions['xi_rho'])
    ny = len(nc.dimensions['eta_rho'])
    tunit = nc.variables['ocean_time'].units

    #

    time = np.zeros_like(nc.variables['zeta'][:,:,:])
    lon  = np.zeros_like(nc.variables['zeta'][:,:,:])
    lat  = np.zeros_like(nc.variables['zeta'][:,:,:])
    h    = np.zeros_like(nc.variables['zeta'][:,:,:])

    for t in xrange(nt):
        time[t,:,:] = nc.variables['ocean_time'][t]
        h[t,:,:]    = nc.variables['h'][:,:]
    for x in xrange(nx):
        lon[:,:,x] = nc.variables['lon_rho'][0,x]
    for y in xrange(ny):
        lat[:,y,:] = nc.variables['lat_rho'][y,0]
        
    flux['time'] = time.flatten()
    flux['lon']  = lon.flatten()
    flux['lat']  = lat.flatten()
    flux['h']    = h.flatten()

    #

    df = pd.DataFrame(flux)
    num2date = lambda num: netCDF4.num2date(num, tunit)
    df.time  = df.time.apply(num2date)
    df = df.set_index(['time','lon','lat','h'])
    df = df.dropna()

    mean = df.mean()
    mean

    mean.to_csv('{}_mean.csv'.format(csvname))

    df.describe().to_csv('{}_describe.csv'.format(csvname))

    df.sum().to_csv('{}_sum.csv'.format(csvname))

    df.to_csv('{}_all.csv'.format(csvname))
示例#29
0
def parsing_nc_file(hk_file):
    """
	Purpose : All this function does is take data out of the hk file
	Inputs : hk_file - the name of the file
	Outputs : data - the raw data from the hk file
	"""
    f = nc.MFDataset(hk_file)
    data = f.variables['hk_data'][:]
    f.close()
    return data
示例#30
0
def check(u_urls, v_urls):
    """check files for consistency and assumptions"""
    with netCDF4.MFDataset(u_urls, aggdim='time') as ds_u:
        # lookup variables in both files
        t_u = ds_u.variables['time'][:]
        lon_u = ds_u.variables['lon'][:]
        lat_u = ds_u.variables['lat'][:]

    with netCDF4.MFDataset(u_urls, aggdim='time') as ds_v:
        # assert equality
        t_v = ds_v.variables['time'][:]
        lon_v = ds_v.variables['lon'][:]
        lat_v = ds_v.variables['lat'][:]
        assert (t_u == t_v).all()
        assert (lat_u == lat_v).all()
        assert (lon_u == lon_v).all()
        # assert assumed units
    assert ds_u.variables['time'].units == 'hours since 1800-01-01 00:00:0.0'
    assert ds_v.variables['time'].units == 'hours since 1800-01-01 00:00:0.0'