Exemplo n.º 1
0
    def __init__(self, path=None, xlim=None, ylim=None, tlim=None):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.data = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()
        self.alias = dict()

        # Sets global parameters for grid.
        if path == None:
            path = ('/home/sebastian/academia/data/ncep.reanalysis2/'
                'gaussian_grid')
        self.params['path'] = path
        self.params['var_list'] = []
        self.params['year_list'] = []

        # Generates list of files, tries to match them to the pattern and to
        # extract the time. To help understanding the naming convetion and
        # pattern, see the following example:
        #   uwnd.2015.nc
        file_pattern = '(.*).([0-9]{4}).nc'
        flist = listdir(self.params['path'])
        flist, match = reglist(flist, file_pattern)
        self.params['file_list'] = flist

        # Gets list of variables from file match.
        _vars, _years = zip(*match)
        self.params['var_list'] = unique(_vars)
        self.params['year_list'] = unique(_years)

        # Loads data from first variable and loads longitude and latitude data.
        # We assume that all data is homogeneous throughout the dataset. Then
        # walks through each year and loads time vector.
        _var = self.params['var_list'][0]
        for _i, _year in enumerate(self.params['year_list']):
            fname = '{}.{}.nc'.format(_var, _year)
            try:
                data.close()
            except:
                pass
            data = self._open_file(fname)
            #
            if _i == 0:
                lon = data.variables['lon'].data
                lat = data.variables['lat'].data
                time = data.variables['time'].data
            else:
                time = hstack([time, data.variables['time'].data])

        # Time in dataset is given in `hours since 1800-1-1 00:00:0.0` and we
        # convert it to matplotlib's date format.

        if data.variables['time'].units == 'hours since 1800-1-1 00:00:0.0':
            self.params['t0'] = dates.date2num(dates.datetime.datetime(1800, 1, 1, 0, 0))
            time = self.params['t0'] + time / 24.

        # If lon_0 is set, calculate how many indices have to be moved in
        # order for latitude array to start at lon_0.
        lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(lon,
            lat, xlim, ylim)

        self.params['xlim'], self.params['ylim'] = xlim, ylim
        self.params['lon_i'], self.params['lat_j'] = ii, jj

        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'ncep_reanalysis'
        self.description = ('NCEP Reanalysis project is analysis/forecast '
            'system to perform data assimilation using past data from 1979 '
            'owards.')
        self.attributes['institution'] = data.institution
        self.dimensions = dict(n=time.size, k=0, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time', k='height', j='latitude',
            i='longitude')
        self.variables = dict(
            time = atlantis.data.Variable(),
            height = atlantis.data.get_standard_variable('height'),
            latitude = atlantis.data.get_standard_variable('latitude'),
            longitude = atlantis.data.get_standard_variable('longitude'),
            xm = atlantis.data.Variable(),
            ym = atlantis.data.Variable(),
        )
        #
        self.variables['time'].data = time
        self.variables['time'].canonical_units = 'days since 0001-01-01 UTC'
        #
        self.variables['height'].data = 0.
        self.variables['latitude'].data = lat
        self.variables['longitude'].data = lon
        #
        self.variables['xm'].canonical_units = 'km'
        self.variables['xm'].description = 'Zonal distance.'
        self.variables['ym'].canonical_units = 'km'
        self.variables['ym'].description = 'Meridional distance.'
        self.variables['xm'].data, self.variables['ym'].data = (
            metergrid(self.variables['longitude'].data,
            self.variables['latitude'].data, units='km')
        )
        #
        data.close()

        # Walks through each variable file for the first year, reads their
        # attributes and adds to the dataset definition.
        self._message('\n')
        _year = self.params['year_list'][0]
        for _var in self.params['var_list']:
            fname = '{}.{}.nc'.format(_var, _year)
            data = self._open_file(fname)
            self._message('{}: '.format(_var))
            for _key in data.variables.keys():
                self._message('{} '.format(_key))
                if _key in ['time', 'time_bnds', 'level', 'level_bnds', 'lat',
                    'lon']:
                    continue
                try:
                    self.variables[_key] = atlantis.data.get_standard_variable(
                        data.variables[_key].standard_name,
                        units=data.variables[_key].units,
                        long_name=data.variables[_key].long_name,
                    )
                except:
                    self._message('*  ')
                    self.variables[_key] = atlantis.data.Variable(
                        units=data.variables[_key].units,
                        standard_name=data.variables[_key].standard_name,
                        long_name=data.variables[_key].long_name,
                        description=data.variables[_key].var_desc,
                    )
                self.alias[_key] = _var
            #
            self._message('\n')
            data.close()
        #
        return
Exemplo n.º 2
0
 def __init__(self, path=None, mask_file=None, xlim=None, ylim=None,
     tlim=None, useqd=False):
     # Initializes the variables to default values. The indices 'n', 'k',
     # 'j' and 'i' refer to the temporal, height, meridional and zonal
     # coordinates respectively. If one of these indexes is set to 'None',
     # then it is assumed infinite size, which is relevant for the 'time'
     # coordinate.
     self.attributes = dict()
     self.dimensions = dict(n=0, k=0, j=0, i=0)
     self.coordinates = dict(n=None, k=None, j=None, i=None)
     self.variables = dict()
     self.params = dict()
     self.stencil_coeffs = dict()
     self.stencil_params = dict()
     if useqd:
         self.params['datasets'] = [dict(id='h', var='h_qd')]
         self.params['var_dict'] = dict(h_Grid_0001 = 'h_qd')
         self.params['var_tcid'] = dict(h_qd=['h', 'h_qd', 'Grid_0001'])
     else:
         self.params['datasets'] = [dict(id='h', var='h'),
             dict(id='uv', var='uv'), dict(id='err', var='err')]
         self.params['var_dict'] = dict(
             h_Grid_0001 = 'h',
             uv_Grid_0001 = 'u',
             uv_Grid_0002 = 'v',
             err_Grid_0001 = 'err'
         )
         self.params['var_tcid'] = dict(
             h = ['h', 'h', 'Grid_0001'],
             u = ['uv', 'uv', 'Grid_0001'],
             v = ['uv', 'uv', 'Grid_0002'],
             err = ['err', 'err', 'Grid_0001']
         )
     # Creates an universally unique identifiers (UUID) for this instance
     self.params['uuid'] = str(uuid())
     
     # Sets global parameters for grid.
     if path == None:
         path = ('/home/sebastian/academia/data/aviso/msla/merged')
     self.params['path'] = path
     self.params['mask_file'] = mask_file
     self.params['missing_value'] = -9999.
     
     # Generates list of files, tries to match them to the pattern and to 
     # extract the time.
     file_pattern = ('dt_ref_global_merged_msla_(%s)_(\d*)_(\d*)_(\d*)'
         '.nc.gz' % ('|'.join([item['var'] for item in
         self.params['datasets']])))
     flist = listdir('%s/%s' % (self.params['path'],
         self.params['datasets'][0]['id']))
     flist.sort()
     flist, match = reglist(flist, file_pattern)
     
     # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC.
     time_list = array(dates.datestr2num(['%4s-%2s-%2s 12:00' %
         (item[1][:4], item[1][4:6], item[1][6:]) for item in match]))
     
     # If tlim are set, calculate the time limits of the dataset and
     # corresponding files.
     if tlim != None:
         for i, t in enumerate(tlim):
             if type(t) == str:
                 tlim[i] = dates.datestr2num(t)
         #
         t_sel = flatnonzero(((time_list >= tlim[0]) &
             (time_list <= tlim[1])))
         time_list = time_list[t_sel]
     else:
         t_sel = range(len(time_list))
     
     fdict = [dict(start=match[n][1], end=match[n][2],
         creation=match[n][3]) for n in t_sel]
     self.params['file_list'] = fdict
     if len(flist) == 0:
         return
     
     # Reads first file in dataset to determine array geometry and 
     # dimenstions (lon, lat)
     params = dict(path=self.params['path'],
         dataset=self.params['datasets'][0]['id'],
         datavar=self.params['datasets'][0]['var'],
         **self.params['file_list'][0])
     fname = self.create_filename(**params)
     data = self.read_file(fname)
     lat = data.variables['NbLatitudes'].data
     lon = data.variables['NbLongitudes'].data
     
     # If xlim and ylim are set, calculate how many indices have to be moved
     # in order for latitude array to start at xlim[0].
     lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(lon,
         lat, xlim, ylim)
     self.params['xlim'], self.params['ylim'] = xlim, ylim
     self.params['lon_i'], self.params['lat_j'] = ii, jj
     self.params['dlon'] = lon[1] - lon[0]
     self.params['dlat'] = lat[1] - lat[0]
     
     # Initializes the grid attributes, dimensions, coordinates and
     # variables.
     self.name = 'sea_level_anomaly_geostrophic_velocities'
     for attr, attr_value in vars(data).iteritems():
         if attr in ['mode', 'filename']:
             continue
         if type(attr_value) == str:
             if attr in ['name']:
                 self.name = attr_value
             elif attr in ['description', 'summary', 'title']:
                 self.description = attr_value
             else:
                 self.attributes[attr.lower()] = attr_value
     self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size)
     self.coordinates = dict(n='time', k='height', j='latitude',
         i='longitude')
     #
     self.variables = dict(
         time = atlantis.data.variable(
             canonical_units='days since 0001-01-01 UTC',
             data=time_list,
         ),
         height = atlantis.data.get_standard_variable('height', data=[0.]),
         latitude = atlantis.data.get_standard_variable('latitude',
             data=lat),
         longitude = atlantis.data.get_standard_variable('longitude',
             data=lon),
         xm = atlantis.data.variable(
             canonical_units = 'km',
             description = 'Zonal distance.'
         ),
         ym = atlantis.data.variable(
             canonical_units = 'km',
             description = 'Meridional distance.'
         ),
     )
     #
     self.variables['xm'].data, self.variables['ym'].data = (
         metergrid(self.variables['longitude'].data, 
         self.variables['latitude'].data, unit='km')
     )
     # Walks through every dataset to read list of variables.
     self.params['var_list'] = list()
     for i, dataset in enumerate(self.params['datasets']):
         if i > 0:
             params = dict(path=self.params['path'],
                 dataset=dataset['id'], datavar=dataset['var'],
                 **self.params['file_list'][0])
             fname = self.create_filename(**params)
             data = self.read_file(fname)
         # Walks through every variable in NetCDF file
         for var in data.variables.keys():
             if var in ['Grid_0001', 'Grid_0002']:
                 nvar = self.params['var_dict']['{0}_{1}'.format(
                     dataset['id'], var)]
                 attribs = dict(
                     missing_value = data.variables[var]._FillValue,
                     canonical_units = data.variables[var].units,
                     description = data.variables[var].long_name,
                     dataset = dataset,
                     variable = var
                 )
                 self.variables[nvar] = atlantis.data.variable(**attribs)
                 self.params['var_list'].append(nvar)
         # Closes the data access and removes temporary NetCDF file
         self.close_file(data)
     
     return
Exemplo n.º 3
0
    def __init__(self, path=None, xlim=None, ylim=None, tlim=None):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.data = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()
        self.alias = dict()

        # Sets global parameters for grid.
        if path == None:
            path = ('/home/sebastian/academia/data/ncep.reanalysis2/'
                    'gaussian_grid')
        self.params['path'] = path
        self.params['var_list'] = []
        self.params['year_list'] = []

        # Generates list of files, tries to match them to the pattern and to
        # extract the time. To help understanding the naming convetion and
        # pattern, see the following example:
        #   uwnd.2015.nc
        file_pattern = '(.*).([0-9]{4}).nc'
        flist = listdir(self.params['path'])
        flist, match = reglist(flist, file_pattern)
        self.params['file_list'] = flist

        # Gets list of variables from file match.
        _vars, _years = zip(*match)
        self.params['var_list'] = unique(_vars)
        self.params['year_list'] = unique(_years)

        # Loads data from first variable and loads longitude and latitude data.
        # We assume that all data is homogeneous throughout the dataset. Then
        # walks through each year and loads time vector.
        _var = self.params['var_list'][0]
        for _i, _year in enumerate(self.params['year_list']):
            fname = '{}.{}.nc'.format(_var, _year)
            try:
                data.close()
            except:
                pass
            data = self._open_file(fname)
            #
            if _i == 0:
                lon = data.variables['lon'].data
                lat = data.variables['lat'].data
                time = data.variables['time'].data
            else:
                time = hstack([time, data.variables['time'].data])

        # Time in dataset is given in `hours since 1800-1-1 00:00:0.0` and we
        # convert it to matplotlib's date format.

        if data.variables['time'].units == 'hours since 1800-1-1 00:00:0.0':
            self.params['t0'] = dates.date2num(
                dates.datetime.datetime(1800, 1, 1, 0, 0))
            time = self.params['t0'] + time / 24.

        # If lon_0 is set, calculate how many indices have to be moved in
        # order for latitude array to start at lon_0.
        lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(
            lon, lat, xlim, ylim)

        self.params['xlim'], self.params['ylim'] = xlim, ylim
        self.params['lon_i'], self.params['lat_j'] = ii, jj

        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'ncep_reanalysis'
        self.description = (
            'NCEP Reanalysis project is analysis/forecast '
            'system to perform data assimilation using past data from 1979 '
            'owards.')
        self.attributes['institution'] = data.institution
        self.dimensions = dict(n=time.size, k=0, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time',
                                k='height',
                                j='latitude',
                                i='longitude')
        self.variables = dict(
            time=atlantis.data.Variable(),
            height=atlantis.data.get_standard_variable('height'),
            latitude=atlantis.data.get_standard_variable('latitude'),
            longitude=atlantis.data.get_standard_variable('longitude'),
            xm=atlantis.data.Variable(),
            ym=atlantis.data.Variable(),
        )
        #
        self.variables['time'].data = time
        self.variables['time'].canonical_units = 'days since 0001-01-01 UTC'
        #
        self.variables['height'].data = 0.
        self.variables['latitude'].data = lat
        self.variables['longitude'].data = lon
        #
        self.variables['xm'].canonical_units = 'km'
        self.variables['xm'].description = 'Zonal distance.'
        self.variables['ym'].canonical_units = 'km'
        self.variables['ym'].description = 'Meridional distance.'
        self.variables['xm'].data, self.variables['ym'].data = (metergrid(
            self.variables['longitude'].data,
            self.variables['latitude'].data,
            units='km'))
        #
        data.close()

        # Walks through each variable file for the first year, reads their
        # attributes and adds to the dataset definition.
        self._message('\n')
        _year = self.params['year_list'][0]
        for _var in self.params['var_list']:
            fname = '{}.{}.nc'.format(_var, _year)
            data = self._open_file(fname)
            self._message('{}: '.format(_var))
            for _key in data.variables.keys():
                self._message('{} '.format(_key))
                if _key in [
                        'time', 'time_bnds', 'level', 'level_bnds', 'lat',
                        'lon'
                ]:
                    continue
                try:
                    self.variables[_key] = atlantis.data.get_standard_variable(
                        data.variables[_key].standard_name,
                        units=data.variables[_key].units,
                        long_name=data.variables[_key].long_name,
                    )
                except:
                    self._message('*  ')
                    self.variables[_key] = atlantis.data.Variable(
                        units=data.variables[_key].units,
                        standard_name=data.variables[_key].standard_name,
                        long_name=data.variables[_key].long_name,
                        description=data.variables[_key].var_desc,
                    )
                self.alias[_key] = _var
            #
            self._message('\n')
            data.close()
        #
        return
Exemplo n.º 4
0
    def __init__(self, path=None, mask_file=None, xlim=None, ylim=None):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()

        # Sets global parameters for grid.
        if path == None:
            path = ('/home/sebastian/academia/data/ncdc.noaa/seawinds/stress/'
                    'daily')
        self.params['path'] = path
        self.params['mask_file'] = mask_file
        self.params['missing_value'] = -9999.

        # Generates list of files, tries to match them to the pattern and to
        # extract the time.
        file_pattern = 'tauxy([0-9]{8}).nc'
        flist = listdir(self.params['path'])
        flist, match = reglist(flist, file_pattern)
        self.params['file_list'] = flist
        if len(flist) == 0:
            return

        # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC.
        time_list = array([dates.datestr2num(item) for item in match])

        # Reads first file in dataset to determine array geometry and
        # dimenstions (lon, lat)
        data = netcdf(
            '%s/%s' % (self.params['path'], self.params['file_list'][0]), 'r')
        for var in data.variables.keys():
            if var in ['latitude', 'lat']:
                lat = data.variables[var].data
            elif var in ['longitude', 'lon']:
                lon = data.variables[var].data

        # If xlim and ylim are set, calculate how many indices have to be moved
        # in order for latitude array to start at xlim[0].
        if (xlim != None) | (ylim != None):
            if xlim == None:
                xlim = (lon.min(), lon.max())
            if ylim == None:
                ylim = (lat.min(), lat.max())
            #
            LON = lon_n(lon, xlim[1])
            i = argsort(LON)
            selx = i[flatnonzero((LON[i] >= xlim[0]) & (LON[i] <= xlim[1]))]
            sely = flatnonzero((lat >= ylim[0]) & (lat <= ylim[1]))
            ii, jj = meshgrid(selx, sely)
            lon = LON[selx]
            lat = lat[sely]
            self.params['xlim'] = xlim
            self.params['ylim'] = ylim
            self.params['lon_i'] = ii
            self.params['lat_j'] = jj
        self.params['dlon'] = lon[1] - lon[0]
        self.params['dlat'] = lat[1] - lat[0]

        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'sea_surface_wind_stress'
        for attr, attr_value in vars(data).iteritems():
            if attr in ['mode', 'filename']:
                continue
            if type(attr_value) == str:
                if attr in ['name']:
                    self.name = attr_value
                elif attr in ['description', 'summary']:
                    self.description = attr_value
                else:
                    self.attributes[attr.lower()] = attr_value
        self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time',
                                k='height',
                                j='latitude',
                                i='longitude')
        #
        self.variables = dict(
            time=atlantis.data.variable(
                canonical_units='days since 0001-01-01 UTC',
                data=time_list,
            ),
            height=atlantis.data.get_standard_variable('height', data=[0.]),
            latitude=atlantis.data.get_standard_variable('latitude', data=lat),
            longitude=atlantis.data.get_standard_variable('longitude',
                                                          data=lon),
            xm=atlantis.data.variable(canonical_units='km',
                                      description='Zonal distance.'),
            ym=atlantis.data.variable(canonical_units='km',
                                      description='Meridional distance.'),
        )
        #
        self.variables['xm'].data, self.variables['ym'].data = (metergrid(
            self.variables['longitude'].data,
            self.variables['latitude'].data,
            unit='km'))
        #
        self.params['var_list'] = list()
        for var in data.variables.keys():
            if var in ['tau', 'taux', 'tauy', 'tau_div', 'tau_curl']:
                attribs = dict()
                for attr, attr_value in vars(data.variables[var]).iteritems():
                    if attr == '_FillValue':
                        attribs['missing_value'] = attr_value
                    elif attr == 'data':
                        continue
                    elif attr == 'long_name':
                        attribs['description'] = attr_value
                    elif attr == 'units':
                        if attr_value == 'N/m**2':
                            a = 'N m-2'
                        else:
                            a = attr_value
                        attribs['canonical_units'] = a
                    else:
                        attribs[attr] = attr_value
                self.variables[var] = atlantis.data.variable(**attribs)
                self.params['var_list'].append(var)
                if self.variables[var].missing_value == None:
                    self.variables[var].missing_value = (
                        self.params['missing_value'])
        #
        data.close()
        return
Exemplo n.º 5
0
    def make_index(self, profile=True):
        """."""
        t1 = time()
        if profile:
            s = '\rBuilding preliminary time array...'
            stdout.write(s)
            stdout.flush()
        
        time_mission = dict()
        time_dataset = dict()
        N = len(self.params['missions'])
        for i, mission in enumerate(self.params['missions']):
            t2 = time()
            tt1 = time()
            #
            mpath = '%s/%s' % (self.params['path'], mission)  # Mission path
            ylist = listdir(mpath)  # Year list in mission path
            Nyear = len(ylist)
            file_pattern = ('%s_%s_%s_%s_%s_(\d*)_(\d*)_(\d*)_(\d*)_(\d*)_'
                '(\d*).nc.gz') % ('GW', self.params['level'].upper(),
                self._labels[self.params['product']], self._labels[mission],
                self.params['delay'].upper())
            # Initializes time mission dictionary
            time_mission[mission] = dict(data=[], file=[])
            for j, yr in enumerate(ylist):
                tt2 = time()
                # Lists all the directories in year
                dlist = listdir('%s/%s' % (mpath, yr))
                for dset in dlist:
                    # Lists all the data files in mission in a given year and 
                    # matches it with the file pattern.
                    cur_path = '%s/%s/%s' % (mpath, yr, dset)
                    flist = listdir(cur_path)
                    flist.sort()
                    flist, match = reglist(flist, file_pattern)
                    # Convert data and product dates to matplotlib format, i.e. 
                    # days since 0001-01-01 UTC and appends to the global
                    # mission and dataset time dictionaries.
                    for k, item in enumerate(match):
                        datetime_start = dates.datestr2num(
                            '%4s-%2s-%2s %2s:%2s:%2s' % (item[0][0:4],
                            item[0][4:6], item[0][6:8], item[1][0:2],
                            item[1][2:4], item[1][4:6])
                        )
                        datetime_end = dates.datestr2num(
                            '%4s-%2s-%2s %2s:%2s:%2s' % (item[2][0:4],
                            item[2][4:6], item[2][6:8], item[3][0:2],
                            item[3][2:4], item[3][4:6])
                        )
                        time_data = (datetime_start + datetime_end) / 2.
                        cycle = int(item[4])
                        orbit = int(item[5])
                        time_mission[mission]['data'].append(time_data)
                        #
                        fname = '%s/%s/%s' % (yr, dset, flist[k])
                        descriptor = (mission, dset, fname, cycle, orbit)
                        if time_data not in time_dataset.keys():
                            time_dataset[time_data] = [descriptor]
                        else:
                            time_dataset[time_data].append(descriptor)
                        #
                        time_mission[mission]['file'].append(fname)
                #
                # Profiling
                if profile:
                    s = '\rBuilding preliminary time array for %s: %s ' % (
                        self._missions[mission], profiler(Nyear, j+1, t0, tt1,
                        tt2),
                    )
                    stdout.write(s)
                    stdout.flush()
            #
            time_mission[mission]['data'] = array(
                time_mission[mission]['data']
            )
            time_mission[mission]['file'] = array(
                time_mission[mission]['file']
            )
            # Profiling
            if profile:
                s = '\rBuilding preliminary time array... %s ' % (profiler(N,
                    i+1, t0, t1, t2),)
                stdout.write(s)
                stdout.flush()
        #
        if profile:
            stdout.write('\n')
            stdout.flush()

        return time_mission, time_dataset
Exemplo n.º 6
0
    def make_index(self, profile=True):
        """."""
        t1 = time()
        if profile:
            s = '\rBuilding preliminary time array...'
            stdout.write(s)
            stdout.flush()

        time_mission = dict()
        time_dataset = dict()
        N = len(self.params['missions'])
        for i, mission in enumerate(self.params['missions']):
            t2 = time()
            tt1 = time()
            #
            mpath = '%s/%s' % (self.params['path'], mission)  # Mission path
            ylist = listdir(mpath)  # Year list in mission path
            Nyear = len(ylist)
            file_pattern = ('%s_%s_%s_%s_%s_(\d*)_(\d*)_(\d*)_(\d*)_(\d*)_'
                            '(\d*).nc.gz') % ('GW', self.params['level'].upper(
                            ), self._labels[self.params['product']],
                                              self._labels[mission],
                                              self.params['delay'].upper())
            # Initializes time mission dictionary
            time_mission[mission] = dict(data=[], file=[])
            for j, yr in enumerate(ylist):
                tt2 = time()
                # Lists all the directories in year
                dlist = listdir('%s/%s' % (mpath, yr))
                for dset in dlist:
                    # Lists all the data files in mission in a given year and
                    # matches it with the file pattern.
                    cur_path = '%s/%s/%s' % (mpath, yr, dset)
                    flist = listdir(cur_path)
                    flist.sort()
                    flist, match = reglist(flist, file_pattern)
                    # Convert data and product dates to matplotlib format, i.e.
                    # days since 0001-01-01 UTC and appends to the global
                    # mission and dataset time dictionaries.
                    for k, item in enumerate(match):
                        datetime_start = dates.datestr2num(
                            '%4s-%2s-%2s %2s:%2s:%2s' %
                            (item[0][0:4], item[0][4:6], item[0][6:8],
                             item[1][0:2], item[1][2:4], item[1][4:6]))
                        datetime_end = dates.datestr2num(
                            '%4s-%2s-%2s %2s:%2s:%2s' %
                            (item[2][0:4], item[2][4:6], item[2][6:8],
                             item[3][0:2], item[3][2:4], item[3][4:6]))
                        time_data = (datetime_start + datetime_end) / 2.
                        cycle = int(item[4])
                        orbit = int(item[5])
                        time_mission[mission]['data'].append(time_data)
                        #
                        fname = '%s/%s/%s' % (yr, dset, flist[k])
                        descriptor = (mission, dset, fname, cycle, orbit)
                        if time_data not in time_dataset.keys():
                            time_dataset[time_data] = [descriptor]
                        else:
                            time_dataset[time_data].append(descriptor)
                        #
                        time_mission[mission]['file'].append(fname)
                #
                # Profiling
                if profile:
                    s = '\rBuilding preliminary time array for %s: %s ' % (
                        self._missions[mission],
                        profiler(Nyear, j + 1, t0, tt1, tt2),
                    )
                    stdout.write(s)
                    stdout.flush()
            #
            time_mission[mission]['data'] = array(
                time_mission[mission]['data'])
            time_mission[mission]['file'] = array(
                time_mission[mission]['file'])
            # Profiling
            if profile:
                s = '\rBuilding preliminary time array... %s ' % (profiler(
                    N, i + 1, t0, t1, t2), )
                stdout.write(s)
                stdout.flush()
        #
        if profile:
            stdout.write('\n')
            stdout.flush()

        return time_mission, time_dataset
Exemplo n.º 7
0
    def __init__(self, path=None, mask_file=None, xlim=None, ylim=None):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()

        # Sets global parameters for grid.
        if path == None:
            path = ('/home/sebastian/academia/data/ncdc.noaa/seawinds/stress/'
                'daily')
        self.params['path'] = path
        self.params['mask_file'] = mask_file
        self.params['missing_value'] = -9999.
        
        # Generates list of files, tries to match them to the pattern and to 
        # extract the time.
        file_pattern = 'tauxy([0-9]{8}).nc'
        flist = listdir(self.params['path'])
        flist, match = reglist(flist, file_pattern)
        self.params['file_list'] = flist
        if len(flist) == 0:
            return

        # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC.
        time_list = array([dates.datestr2num(item) for item in match])
        
        # Reads first file in dataset to determine array geometry and 
        # dimenstions (lon, lat)
        data = netcdf('%s/%s' % (self.params['path'],
            self.params['file_list'][0]), 'r')
        for var in data.variables.keys():
            if var in ['latitude', 'lat']:
                lat = data.variables[var].data
            elif var in ['longitude', 'lon']:
                lon = data.variables[var].data
        
        # If xlim and ylim are set, calculate how many indices have to be moved
        # in order for latitude array to start at xlim[0].
        if (xlim != None) | (ylim != None):
            if xlim == None:
                xlim = (lon.min(), lon.max())
            if ylim == None:
                ylim = (lat.min(), lat.max())
            #
            LON = lon_n(lon, xlim[1])
            i = argsort(LON)
            selx = i[flatnonzero((LON[i] >= xlim[0]) & (LON[i] <= xlim[1]))]
            sely = flatnonzero((lat >= ylim[0]) & (lat <= ylim[1]))
            ii, jj = meshgrid(selx, sely)
            lon = LON[selx]
            lat = lat[sely]
            self.params['xlim'] = xlim
            self.params['ylim'] = ylim
            self.params['lon_i'] = ii
            self.params['lat_j'] = jj
        self.params['dlon'] = lon[1] - lon[0]
        self.params['dlat'] = lat[1] - lat[0]
        
        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'sea_surface_wind_stress'
        for attr, attr_value in vars(data).iteritems():
            if attr in ['mode', 'filename']:
                continue
            if type(attr_value) == str:
                if attr in ['name']:
                    self.name = attr_value
                elif attr in ['description', 'summary']:
                    self.description = attr_value
                else:
                    self.attributes[attr.lower()] = attr_value
        self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time', k='height', j='latitude',
            i='longitude')
        #
        self.variables = dict(
            time = atlantis.data.variable(
                canonical_units='days since 0001-01-01 UTC',
                data=time_list,
            ),
            height = atlantis.data.get_standard_variable('height', data=[0.]),
            latitude = atlantis.data.get_standard_variable('latitude',
                data=lat),
            longitude = atlantis.data.get_standard_variable('longitude',
                data=lon),
            xm = atlantis.data.variable(
                canonical_units = 'km',
                description = 'Zonal distance.'
            ),
            ym = atlantis.data.variable(
                canonical_units = 'km',
                description = 'Meridional distance.'
            ),
        )
        #
        self.variables['xm'].data, self.variables['ym'].data = (
            metergrid(self.variables['longitude'].data, 
            self.variables['latitude'].data, unit='km')
        )
        #
        self.params['var_list'] = list()
        for var in data.variables.keys():
            if var in ['tau', 'taux', 'tauy', 'tau_div', 'tau_curl']:
                attribs = dict()
                for attr, attr_value in vars(data.variables[var]).iteritems():
                    if attr == '_FillValue':
                        attribs['missing_value'] = attr_value
                    elif attr == 'data':
                        continue
                    elif attr == 'long_name':
                        attribs['description'] = attr_value
                    elif attr == 'units':
                        if attr_value == 'N/m**2':
                            a = 'N m-2'
                        else:
                            a = attr_value
                        attribs['canonical_units'] = a
                    else:
                        attribs[attr] = attr_value
                self.variables[var] = atlantis.data.variable(**attribs)
                self.params['var_list'].append(var)
                if self.variables[var].missing_value == None:
                    self.variables[var].missing_value = (
                        self.params['missing_value'])
        #
        data.close()
        return
Exemplo n.º 8
0
    def __init__(self,
                 path=None,
                 mask_file=None,
                 xlim=None,
                 ylim=None,
                 tlim=None,
                 useqd=False):
        # Initializes the variables to default values. The indices 'n', 'k',
        # 'j' and 'i' refer to the temporal, height, meridional and zonal
        # coordinates respectively. If one of these indexes is set to 'None',
        # then it is assumed infinite size, which is relevant for the 'time'
        # coordinate.
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict()
        self.stencil_coeffs = dict()
        self.stencil_params = dict()
        if useqd:
            self.params['datasets'] = [dict(id='h', var='h_qd')]
            self.params['var_dict'] = dict(h_Grid_0001='h_qd')
            self.params['var_tcid'] = dict(h_qd=['h', 'h_qd', 'Grid_0001'])
        else:
            self.params['datasets'] = [
                dict(id='h', var='h'),
                dict(id='uv', var='uv'),
                dict(id='err', var='err')
            ]
            self.params['var_dict'] = dict(h_Grid_0001='h',
                                           uv_Grid_0001='u',
                                           uv_Grid_0002='v',
                                           err_Grid_0001='err')
            self.params['var_tcid'] = dict(h=['h', 'h', 'Grid_0001'],
                                           u=['uv', 'uv', 'Grid_0001'],
                                           v=['uv', 'uv', 'Grid_0002'],
                                           err=['err', 'err', 'Grid_0001'])
        # Creates an universally unique identifiers (UUID) for this instance
        self.params['uuid'] = str(uuid())

        # Sets global parameters for grid.
        if path == None:
            path = ('/home/sebastian/academia/data/aviso/msla/merged')
        self.params['path'] = path
        self.params['mask_file'] = mask_file
        self.params['missing_value'] = -9999.

        # Generates list of files, tries to match them to the pattern and to
        # extract the time.
        file_pattern = (
            'dt_ref_global_merged_msla_(%s)_(\d*)_(\d*)_(\d*)'
            '.nc.gz' %
            ('|'.join([item['var'] for item in self.params['datasets']])))
        flist = listdir(
            '%s/%s' % (self.params['path'], self.params['datasets'][0]['id']))
        flist.sort()
        flist, match = reglist(flist, file_pattern)

        # Convert dates to matplotlib format, i.e. days since 0001-01-01 UTC.
        time_list = array(
            dates.datestr2num([
                '%4s-%2s-%2s 12:00' % (item[1][:4], item[1][4:6], item[1][6:])
                for item in match
            ]))

        # If tlim are set, calculate the time limits of the dataset and
        # corresponding files.
        if tlim != None:
            for i, t in enumerate(tlim):
                if type(t) == str:
                    tlim[i] = dates.datestr2num(t)
            #
            t_sel = flatnonzero(
                ((time_list >= tlim[0]) & (time_list <= tlim[1])))
            time_list = time_list[t_sel]
        else:
            t_sel = range(len(time_list))

        fdict = [
            dict(start=match[n][1], end=match[n][2], creation=match[n][3])
            for n in t_sel
        ]
        self.params['file_list'] = fdict
        if len(flist) == 0:
            return

        # Reads first file in dataset to determine array geometry and
        # dimenstions (lon, lat)
        params = dict(path=self.params['path'],
                      dataset=self.params['datasets'][0]['id'],
                      datavar=self.params['datasets'][0]['var'],
                      **self.params['file_list'][0])
        fname = self.create_filename(**params)
        data = self.read_file(fname)
        lat = data.variables['NbLatitudes'].data
        lon = data.variables['NbLongitudes'].data

        # If xlim and ylim are set, calculate how many indices have to be moved
        # in order for latitude array to start at xlim[0].
        lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(
            lon, lat, xlim, ylim)
        self.params['xlim'], self.params['ylim'] = xlim, ylim
        self.params['lon_i'], self.params['lat_j'] = ii, jj
        self.params['dlon'] = lon[1] - lon[0]
        self.params['dlat'] = lat[1] - lat[0]

        # Initializes the grid attributes, dimensions, coordinates and
        # variables.
        self.name = 'sea_level_anomaly_geostrophic_velocities'
        for attr, attr_value in vars(data).iteritems():
            if attr in ['mode', 'filename']:
                continue
            if type(attr_value) == str:
                if attr in ['name']:
                    self.name = attr_value
                elif attr in ['description', 'summary', 'title']:
                    self.description = attr_value
                else:
                    self.attributes[attr.lower()] = attr_value
        self.dimensions = dict(n=time_list.size, k=1, j=lat.size, i=lon.size)
        self.coordinates = dict(n='time',
                                k='height',
                                j='latitude',
                                i='longitude')
        #
        self.variables = dict(
            time=atlantis.data.variable(
                canonical_units='days since 0001-01-01 UTC',
                data=time_list,
            ),
            height=atlantis.data.get_standard_variable('height', data=[0.]),
            latitude=atlantis.data.get_standard_variable('latitude', data=lat),
            longitude=atlantis.data.get_standard_variable('longitude',
                                                          data=lon),
            xm=atlantis.data.variable(canonical_units='km',
                                      description='Zonal distance.'),
            ym=atlantis.data.variable(canonical_units='km',
                                      description='Meridional distance.'),
        )
        #
        self.variables['xm'].data, self.variables['ym'].data = (metergrid(
            self.variables['longitude'].data,
            self.variables['latitude'].data,
            unit='km'))
        # Walks through every dataset to read list of variables.
        self.params['var_list'] = list()
        for i, dataset in enumerate(self.params['datasets']):
            if i > 0:
                params = dict(path=self.params['path'],
                              dataset=dataset['id'],
                              datavar=dataset['var'],
                              **self.params['file_list'][0])
                fname = self.create_filename(**params)
                data = self.read_file(fname)
            # Walks through every variable in NetCDF file
            for var in data.variables.keys():
                if var in ['Grid_0001', 'Grid_0002']:
                    nvar = self.params['var_dict']['{0}_{1}'.format(
                        dataset['id'], var)]
                    attribs = dict(
                        missing_value=data.variables[var]._FillValue,
                        canonical_units=data.variables[var].units,
                        description=data.variables[var].long_name,
                        dataset=dataset,
                        variable=var)
                    self.variables[nvar] = atlantis.data.variable(**attribs)
                    self.params['var_list'].append(nvar)
            # Closes the data access and removes temporary NetCDF file
            self.close_file(data)

        return
Exemplo n.º 9
0
    def __init__(self, delay='dt', missions=None, zone='global',
        product='sla', variable='vxxc', path=None, profile=True):
        """
        Initializes the dataset class for reading along-track gridded
        sequential data from the SSALTO/DUACS distributed by Aviso.

        PARAMETERS
            delay (text, optional) :
                Selects whether delayed time products (dt, default)
                or near-real time products are read.
            missions (text, array like, optional) :
                Determines the satellite missions to be selected (i. e.
                e1, e2, tp, tpn, g2, j1, j1n, j2, en, enn, c2, al) If
                set to 'none', all available missions are used.
            zone (text, optional) :
                Geographic coverage of the selected products,
                    global -- Global geographic coverage;
                    med -- Mediterranean;
                    blacksea -- Black Sea;
                    moz -- Mozambique;
                    arctic -- Arctic;
                    europe -- Europe.
            product (text, optional) :
                Variable to be read (sla -- sea level anomaly or
                adt -- absolute dynamic topography)
            variable (text, optional) :
                Either 'vfec' for validated, filtered, sub-sampled and
                LWE-corrected; or 'vxxc' for validated, non-filtered,
                non-sub-sampled and LWE-corrected data.
            path (text, optional) :
                Path to the dataset files.
        
        """
        t0 = time()
        # Checks all the input parameters for consistency
        if delay not in self._delays.keys():
            raise ValueError('Invalid delay parameter "%s".' % (delay))
        if missions == None:
            missions = self._missions.keys()
        elif type(missions) == str:
            if missions in self._missions.keys():
                missions = [missions]
            else:
                raise ValueError('Invalid mission "%s".' % (missions))
        elif type(missions) == list:
            for item in missions:
                if item not in self._missions.keys():
                    raise ValueError('Invalid mission "%s".' % (item))
        else:
            raise ValueError('Invalid mission "%s".' % (missions))
        if zone not in self._zones.keys():
            raise ValueError('Invalid geographic zone "%s".' % (zone))
        if product not in self._products.keys():
            raise ValueError('Invalid product "%s".' % (product))
        if variable not in self._filterings.keys():
            raise ValueError('Invalid variable "%s".' % (variable))
        
        # Initializes parameters and attributes in class variable
        self.attributes = dict()
        self.dimensions = dict(n=0, k=0, j=0, i=0)
        self.coordinates = dict(n=None, k=None, j=None, i=None)
        self.variables = dict()
        self.params = dict(
            delay = delay,
            missions = missions,
            zone = zone,
            product = product,
            variable = variable
        )

        # Creates an universally unique identifiers (UUID) for this instance
        self.params['uuid'] = str(uuid())

        # Sets path and missing value parameters
        if path == None:
            path = '%s/%s/%s/%s/%s' % ('/academia/data/raw/aviso', 
                self._delays[delay], 'along-track', self._filterings[variable],
                product)
        self.params['path'] = path
        self.params['missing_value'] = -9999.
        
        # Determines the temporal range of the whole data set per mission
        t1 = time()
        if profile:
            s = '\rBuilding preliminary time array...'
            stdout.write(s)
            stdout.flush()
        
        time_mission = dict()
        time_dataset = dict()
        N = len(self.params['missions'])
        for i, mission in enumerate(self.params['missions']):
            t2 = time()
            #
            mpath = '%s/%s' % (path, mission)  # Mission path
            ylist = listdir(mpath)  # Year list in mission path
            file_pattern = '%s_%s_%s_%s_%s_(\d*)_(\d*).nc.gz' % (delay, zone, 
                mission, product, variable)
            time_mission[mission] = dict(data=[], product=[], file=[])
            for yr in ylist:
                # Lists all the data files in mission in a given year and 
                # matches it with the file pattern.
                flist = listdir('%s/%s' % (mpath, yr))
                flist.sort()
                flist, match = reglist(flist, file_pattern)
                # Convert data and product dates to matplotlib format, i.e. 
                # days since 0001-01-01 UTC and appends to the global mission
                # and dataset time dictionaries.
                for j, item in enumerate(match):
                    time_data = dates.datestr2num('%4s-%2s-%2s 12:00' % 
                        (item[0][:4], item[0][4:6], item[0][6:]))
                    time_mission[mission]['data'].append(time_data)
                    fname = '%s/%s' % (yr, flist[j])
                    descriptor = (mission, fname)
                    if time_data not in time_dataset.keys():
                        time_dataset[time_data] = [descriptor]
                    else:
                        time_dataset[time_data].append(descriptor)
                    #
                    time_product = dates.datestr2num('%4s-%2s-%2s 12:00' % 
                        (item[1][:4], item[1][4:6], item[1][6:]))
                    time_mission[mission]['product'].append(time_product)
                    #
                    time_mission[mission]['file'].append(fname)
            #
            time_mission[mission]['data'] = array(
                time_mission[mission]['data']
            )
            time_mission[mission]['product'] = array(
                time_mission[mission]['product']
            )
            time_mission[mission]['file'] = array(
                time_mission[mission]['file']
            )
            # Profiling
            if profile:
                s = '\rBuilding preliminary time array... %s ' % (profiler(N, 
                    i+1, t0, t1, t2),)
                stdout.write(s)
                stdout.flush()
        #
        if profile:
            stdout.write('\n')
            stdout.flush()
        #
        self.attributes['time_mission'] = time_mission
        self.attributes['time_dataset'] = time_dataset
        
        # Updates dimensions, coordinates and creates time variable
        self.dimensions['n'] = len(time_dataset)
        self.coordinates['n'] ='time'
        self.variables['time'] = atlantis.data.variable(
            canonical_units = 'days since 0001-01-01 UTC',
            data = array(sorted(time_dataset.keys())),
            height = atlantis.data.get_standard_variable('height', data=[0.]),
            latitude = atlantis.data.get_standard_variable('latitude'),
            longitude = atlantis.data.get_standard_variable('longitude'),
        )
        return None