def test_operations_two_steps(self):
        ## get the request dataset to use as the basis for the percentiles
        uri = self.test_data.get_uri('cancm4_tas')
        variable = 'tas'
        rd = RequestDataset(uri=uri,variable=variable)
        ## this is the underly OCGIS dataset object
        nc_basis = rd.get()
        
        ## NOTE: if you want to subset the basis by time, this step is necessary
#        nc_basis = nc_basis.get_between('temporal',datetime.datetime(2001,1,1),datetime.datetime(2003,12,31,23,59))
        
        ## these are the values to use when calculating the percentile basis. it
        ## may be good to wrap this in a function to have memory freed after the
        ## percentile structure array is computed.
        all_values = nc_basis.variables[variable].value
        ## these are the datetime objects used for window creation
        temporal = nc_basis.temporal.value_datetime
        ## additional parameters for calculating the basis
        percentile = 10
        width = 5
        ## get the structure array
        from ocgis.calc.library.index.dynamic_kernel_percentile import DynamicDailyKernelPercentileThreshold
        daily_percentile = DynamicDailyKernelPercentileThreshold.get_daily_percentile(all_values,temporal,percentile,width)
        
        ## perform the calculation using the precomputed basis. in this case,
        ## the basis and target datasets are the same, so the RequestDataset is
        ## reused.
        calc_grouping = ['month','year']
        kwds = {'percentile':percentile,'width':width,'operation':'lt','daily_percentile':daily_percentile}
        calc = [{'func':'dynamic_kernel_percentile_threshold','name':'tg10p','kwds':kwds}]
        ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc,
                            output_format='nc')
        ret = ops.execute()
        
        ## if we want to return the values as a three-dimenional numpy array the
        ## method below will do this. note the interface arrangement for the next
        ## release will alter this slightly.
        ops = OcgOperations(dataset=rd,calc_grouping=calc_grouping,calc=calc,
                            output_format='numpy')
        arrs = ops.execute()
        ## reference the returned numpy data. the first key is the geometry identifier.
        ## 1 in this case as this is the default for no selection geometry. the second
        ## key is the request dataset alias and the third is the calculation name.
        ## the variable name is appended to the end of the calculation to maintain
        ## a unique identifier.
        tg10p = arrs[1]['tas'].variables['tg10p'].value
        ## if we want the date information for the temporal groups date attributes
        date_parts = arrs[1]['tas'].temporal.date_parts
        assert(date_parts.shape[0] == tg10p.shape[1])
        ## these are the representative datetime objects
        rep_dt = arrs[1]['tas'].temporal.value_datetime
        ## and these are the lower and upper time bounds on the date groups
        bin_bounds = arrs[1]['tas'].temporal.bounds_datetime
        
        ## confirm we have values for each month and year (12*10)
        ret_ds = nc.Dataset(ret)
        try:
            self.assertEqual(ret_ds.variables['tg10p'].shape,(120,64,128))
        finally:
            ret_ds.close()
Пример #2
0
 def __init__(self,uri=None,variable=None,interface_overload={},request_dataset=None):
     self.request_dataset = request_dataset
     if self.request_dataset is None:
         self.uri = uri
         self.variable = variable
         self.alias = None
         self.did = None
         if self.variable is None:
             try:
                 self.ds = None
                 rootgrp = nc.Dataset(uri)
                 self.meta = NcMetadata(rootgrp)
             finally:
                 rootgrp.close()
         else:
             from ocgis.api.request.base import RequestDataset
             kwds = {'uri':uri,'variable':variable}
             kwds.update(interface_overload)
             rd = RequestDataset(**kwds)
             self.ds = rd.get()
             self.meta = self.ds.meta
     else:
         self.uri = self.request_dataset.uri
         self.variable = self.request_dataset.variable
         self.ds = self.request_dataset.get()
         self.meta = self.ds.meta
         self.alias = self.request_dataset.alias
         self.did = self.request_dataset.did
Пример #3
0
    def test_get_field_nonequivalent_units_in_source_data(self):
        new_path = self.test_data.copy_file('cancm4_tas', self.current_dir_output)

        # put non-equivalent units on the source data and attempto to conform
        with nc_scope(new_path, 'a') as ds:
            ds.variables['tas'].units = 'coulomb'
        rd = RequestDataset(uri=new_path, variable='tas', conform_units_to='celsius')
        with self.assertRaises(RequestValidationError):
            rd.get()

        # remove units altogether
        with nc_scope(new_path, 'a') as ds:
            ds.variables['tas'].delncattr('units')
        rd = RequestDataset(uri=new_path, variable='tas', conform_units_to='celsius')
        with self.assertRaises(NoUnitsError):
            rd.get()
    def test_dev_get_split_shapefile(self):
        raise SkipTest('development only')
        self.set_debug()

        shp_path = '/home/benkoziol/l/data/nfie/linked_catchment_shapefiles/linked_13-RioGrande.shp'
        rd = RequestDataset(uri=shp_path)
        field = rd.get()
        self.log.debug('loading from file')
        field.geom.value
        node_count = map(get_node_count, field.geom.value)
        select = np.array(node_count) > 10000
        to_split = field['GRIDCODE'][select]
        for gc in to_split.value.flat:
            self.log.debug('target gridcode: {}'.format(gc))
            idx = np.where(field['GRIDCODE'].value == gc)[0][0]
            target_geom = field.geom.value[idx]
            split_geom = get_split_polygon_by_node_threshold(target_geom, 10000)
            # write_fiona(split_geom, gc)
            self.assertAlmostEqual(split_geom.area, target_geom.area)
            field.geom.value[idx] = split_geom
            self.assertAlmostEqual(field.geom.value[idx].area, target_geom.area)
        self.log.debug(field.geom.geom_type)
        # field.geom[select].parent.write('/tmp/rio-grande-assembled.shp', driver=DriverVector)

        # write_fiona(field.geom.value, 'rio-grande-assembled')
        self.log.debug('writing shapefile')
        field.write('/tmp/rio-grande-assembled.shp', driver=DriverVector)
Пример #5
0
    def test_name(self):
        path = ShpCabinet().get_shp_path('state_boundaries')
        rd = RequestDataset(uri=path, driver='vector')
        self.assertIsNotNone(rd.name)

        rd = RequestDataset(uri=path, driver='vector', name='states')
        self.assertEqual(rd.name, 'states')
        field = rd.get()
        self.assertEqual(field.name, 'states')
Пример #6
0
    def test_init_driver(self):
        uri = ShpCabinet().get_shp_path('state_boundaries')
        rd = RequestDataset(uri=uri, driver='vector')
        self.assertIsNotNone(rd.variable)
        self.assertIsInstance(rd.get(), Field)

        uri_nc = self.test_data.get_uri('cancm4_tas')
        rd = RequestDataset(uri_nc)
        self.assertIsInstance(rd.driver, DriverNetcdf)

        rd = RequestDataset(uri_nc, driver='vector')
        with self.assertRaises(ValueError):
            assert rd.variable
Пример #7
0
    def test_with_overloads_real_data(self):
        # copy the test file as the calendar attribute will be modified
        rd = self.test_data.get_rd('cancm4_tas')
        filename = os.path.split(rd.uri)[1]
        dest = os.path.join(self.current_dir_output, filename)
        shutil.copy2(rd.uri, dest)
        # modify the calendar attribute
        with nc_scope(dest, 'a') as ds:
            self.assertEqual(ds.variables['time'].calendar, '365_day')
            ds.variables['time'].calendar = '365_days'
        # assert the calendar is in fact changed on the source file
        with nc_scope(dest, 'r') as ds:
            self.assertEqual(ds.variables['time'].calendar, '365_days')
        rd2 = RequestDataset(uri=dest, variable='tas')
        field = rd2.get()
        # the bad calendar will raise a value error when the datetimes are converted.
        with self.assertRaises(ValueError):
            field.temporal.value_datetime
        # overload the calendar and confirm the datetime values are the same as the datetime values from the original
        # good file
        rd3 = RequestDataset(uri=dest, variable='tas', t_calendar='365_day')
        field = rd3.get()
        self.assertNumpyAll(field.temporal.value_datetime, rd.get().temporal.value_datetime)

        # pass as a dataset collection to operations and confirm the data may be written to a flat file. dates are
        # converted in the process.
        time_range = (datetime.datetime(2001, 1, 1, 0, 0), datetime.datetime(2011, 1, 1, 0, 0))
        dataset = [{'time_region': None,
                    'uri': dest,
                    'time_range': time_range,
                    'alias': u'tas',
                    't_units': u'days since 1850-1-1',
                    'variable': u'tas',
                    't_calendar': u'365_day'}]
        rdc = RequestDatasetCollection(dataset)
        ops = OcgOperations(dataset=rdc, geom='state_boundaries', select_ugid=[25],
                            output_format=constants.OUTPUT_FORMAT_SHAPEFILE)
        ops.execute()
Пример #8
0
    def test_keyword_time_subset_func(self):

        def _func_(value, bounds=None):
            indices = []
            for ii, v in enumerate(value.flat):
                if v.month == 6:
                    indices.append(ii)
            return indices

        rd = self.test_data.get_rd('cancm4_tas')
        ops = OcgOperations(dataset=rd, time_subset_func=_func_, geom='state_boundaries', geom_select_uid=[20])
        ret = ops.execute()
        for v in ret[20]['tas'].temporal.value_datetime:
            self.assertEqual(v.month, 6)

        rd = self.test_data.get_rd('cancm4_tas')
        ops = OcgOperations(dataset=rd, time_subset_func=_func_, geom='state_boundaries', geom_select_uid=[20],
                            output_format=constants.OUTPUT_FORMAT_NETCDF)
        ret = ops.execute()
        rd_out = RequestDataset(ret)
        for v in rd_out.get().temporal.value_datetime:
            self.assertEqual(v.month, 6)
Пример #9
0
    def test_init_combinations(self):
        rd_orig = self.test_data.get_rd('cancm4_tas')
        dest_uri = os.path.join(self.current_dir_output, os.path.split(rd_orig.uri)[1])
        shutil.copy2(rd_orig.uri, dest_uri)
        with nc_scope(dest_uri, 'a') as ds:
            var = ds.variables['tas']
            outvar = ds.createVariable(var._name + 'max', var.dtype, var.dimensions)
            outvar[:] = var[:] + 3
            outvar.setncatts(var.__dict__)
        with nc_scope(dest_uri) as ds:
            self.assertTrue(set(['tas', 'tasmax']).issubset(set(ds.variables.keys())))

        keywords = dict(
            name=[None, 'foo'],
            uri=[None, dest_uri],
            variable=[None, 'tas', ['tas', 'tasmax'], 'crap'],
            alias=[None, 'tas', ['tas', 'tasmax'], ['tas_alias', 'tasmax_alias']],
            units=[None, [None, None], ['celsius', 'fahrenheit'], 'crap', [None, 'kelvin'], ['crap', 'crappy']],
            conform_units_to=[None, [None, None], ['celsius', 'fahrenheit'], 'crap', [None, 'kelvin'],
                              ['crap', 'crappy'], [None, 'coulomb'], ['coulomb', 'coulomb']])

        def itr_row(key, sequence):
            for element in sequence:
                yield ({key: element})

        def itr_products_keywords(keywords):
            iterators = [itr_row(ki, vi) for ki, vi in keywords.iteritems()]
            for dictionaries in itertools.product(*iterators):
                yld = {}
                for dictionary in dictionaries:
                    yld.update(dictionary)
                yield yld

        for k in itr_products_keywords(keywords):
            try:
                rd = RequestDataset(**k)
                self.assertEqual(rd._source_metadata, None)
                self.assertEqual(len(get_tuple(rd.variable)), len(get_tuple(rd.units)))
                if k['name'] is None:
                    self.assertEqual(rd.name, '_'.join(get_tuple(rd.alias)))
                else:
                    self.assertEqual(rd.name, 'foo')
                for v in rd._variable:
                    try:
                        self.assertTrue(v in rd.source_metadata['variables'].keys())
                    except VariableNotFoundError:
                        if 'crap' in rd._variable:
                            self.assertEqual(rd._source_metadata, None)
                            break
                if k['units'] is None and len(rd._variable) == 1:
                    self.assertEqual(rd.units, None)
                    self.assertEqual(rd._units, None)

                try:
                    field = rd.get()
                    self.assertEqual(field.name, rd.name)
                    self.assertEqual(set(field.variables.keys()), set(get_tuple(rd.alias)))
                except VariableNotFoundError:
                    if 'crap' in rd._variable:
                        continue
                    else:
                        raise
                except RequestValidationError:
                    if 'coulomb' in get_tuple(k['conform_units_to']):
                        continue
                    else:
                        raise
            except RequestValidationError as e:
                # uris cannot be None
                if k['uri'] is None:
                    pass
                # variables cannot be None
                elif k['variable'] is None:
                    pass
                # 'crap' is not a real variable name
                elif k['conform_units_to'] is not None and (k['conform_units_to'] == 'crap' or \
                                                                        'crap' in k['conform_units_to']):
                    pass
                # conform_units_to must match units element-wise
                elif k['conform_units_to'] is not None and k['variable'] is not None and \
                                len(k['conform_units_to']) != len(k['variable']):
                    pass
                # aliases must occur for each variable
                elif len(get_tuple(k['alias'])) != len(get_tuple(k['variable'])):
                    pass
                # units must occur for each variable
                elif len(get_tuple(k['units'])) != len(get_tuple(k['variable'])):
                    pass
                # bad unit definition
                # 'crap' is not a real variable name
                elif k['units'] is not None and (k['units'] == 'crap' or \
                                                             'crap' in k['units']):
                    pass
                # alway need a uri and variable
                elif k['uri'] is None:
                    pass
                else:
                    raise
            except:
                raise
Пример #10
0
 def test_init_variable_not_found(self):
     rd = self.test_data.get_rd('cancm4_tas')
     rd_bad = RequestDataset(uri=rd.uri, variable='crap')
     with self.assertRaises(VariableNotFoundError):
         rd_bad.get()
Пример #11
0
class Inspect(object):
    """
    Inspect a local or remote dataset returning a printout similar to `ncdump`_.
    
    >>> from ocgis import Inspect
    ...
    >>> # Just do an dataset attribute dump.
    >>> ip = Inspect('/my/local/dataset')
    >>> print(ip)
    ...
    >>> # Get variable-specific info.
    >>> ip = Inspect('/my/local/dataset',variable='tas')
    >>> print(ip)
    
    :param uri: Absolute path to data's location.
    :type uri: str
    :param variable: Specific variable to inspect.
    :type variable: str
    :param interface_overload: Overloads for autodiscover.
    :type interface_overload: dict
    :param meta: Use this metadata object in place of the one created internally.
    :type meta: :class:`ocgis.NcMetadata`
    
    .. _ncdump: http://www.unidata.ucar.edu/software/netcdf/docs/netcdf/ncdump.html
    """

    def __init__(self, uri=None, variable=None, request_dataset=None, meta=None):
        if meta is None and uri is None and request_dataset is None:
            raise ValueError('At least one of "uri", "request_dataset", or "meta" must be provided.')

        self.uri = uri
        self.variable = variable
        self.meta = meta
        self.request_dataset = request_dataset

        if self.request_dataset is None and self.meta is None:
            if self.uri is not None and self.variable is not None:
                from ocgis.api.request.base import RequestDataset
                self.request_dataset = RequestDataset(uri=self.uri, variable=self.variable)

        self.alias = None
        self.did = None
        self.ds = None

        if self.request_dataset is None:
            if self.meta is None:
                rootgrp = nc.Dataset(self.uri)
                try:
                    self.meta = NcMetadata(rootgrp)
                finally:
                    rootgrp.close()
        else:
            self.uri = self.request_dataset.uri
            self.variable = self.request_dataset.variable
            self.ds = self.request_dataset.get()
            self.meta = self.ds.meta
            self.alias = self.request_dataset.alias
            self.did = self.request_dataset.did

    def __repr__(self):
        msg = ''
        if self.request_dataset is None:
            lines = self.get_report_no_variable()
        else:
            lines = self.get_report()
        for line in lines:
            msg += line + '\n'
        return msg

    @property
    def _t(self):
        return self.ds.temporal

    @property
    def _s(self):
        return self.ds.spatial

    @property
    def _l(self):
        return self.ds.level

    def get_temporal_report(self):

        try:
            if self._t.format_time:
                res = int(self._t.resolution)
                try:
                    start_date, end_date = self._t.extent_datetime
                # # the times may not be formattable
                except ValueError as e:
                    if e.message == 'year is out of range':
                        start_date, end_date = self._t.extent
                    else:
                        ocgis_lh(exc=e, logger='inspect')
            else:
                res = 'NA (non-formatted times requested)'
                start_date, end_date = self._t.extent
        # # raised if the temporal dimension has a single value. possible with
        ## snippet or a small dataset...
        except ResolutionError:
            res, start_date, end_date = ['NA (singleton)'] * 3

        n = len(self._t.value)

        # # if the calendar attribute is not set, the feature should not be masked
        calendar = self.meta['variables'][self._t.name]['attrs'].get('calendar')
        if calendar is None:
            calendar = 'None (will assume "standard")'

        units = self._t.units

        lines = []
        lines.append('       Start Date = {0}'.format(start_date))
        lines.append('         End Date = {0}'.format(end_date))
        lines.append('         Calendar = {0}'.format(calendar))
        lines.append('            Units = {0}'.format(units))
        lines.append('Resolution (Days) = {0}'.format(res))
        lines.append('            Count = {0}'.format(n))

        ## append information on bounds
        if self._t.bounds is not None:
            has_bounds = True
        else:
            has_bounds = False
        lines.append('       Has Bounds = {0}'.format(has_bounds))

        return lines

    def get_spatial_report(self):
        res = self._s.grid.resolution
        extent = self._s.grid.extent

        itype = self._s.geom.get_highest_order_abstraction().__class__.__name__
        projection = self.ds.spatial.crs

        lines = []
        lines.append('Spatial Reference = {0}'.format(projection.__class__.__name__))
        lines.append('     Proj4 String = {0}'.format(projection.sr.ExportToProj4()))
        lines.append('           Extent = {0}'.format(extent))
        lines.append('   Interface Type = {0}'.format(itype))
        lines.append('       Resolution = {0}'.format(res))
        lines.append('            Count = {0}'.format(self._s.grid.uid.reshape(-1).shape[0]))

        return lines

    def get_level_report(self):
        if self._l is None:
            lines = ['No level dimension found.']
        else:
            lines = []
            lines.append('Level Variable = {0}'.format(self._l.name))
            lines.append('         Count = {0}'.format(self._l.value.shape[0]))

            # # append information on bounds
            if self._l.bounds is not None:
                has_bounds = True
            else:
                has_bounds = False
            lines.append('    Has Bounds = {0}'.format(has_bounds))

        return lines

    def get_dump_report(self):
        return (self.meta._get_lines_())

    def get_report_no_variable(self):
        lines = ['', 'URI = {0}'.format(self.uri)]
        lines.append('VARIABLE = {0}'.format(self.variable))
        lines.append('')
        lines += self.get_dump_report()
        return lines

    def get_report(self):

        # # a variable target is required for this method
        if self.variable is None:
            raise (AttributeError('A "variable" target is required.'))

        mp = [
            {'=== Temporal =============': self.get_temporal_report},
            {'=== Spatial ==============': self.get_spatial_report},
            {'=== Level ================': self.get_level_report},
            {'=== Dump =================': self.get_dump_report}
        ]

        lines = ['', 'URI = {0}'.format(self.uri)]
        lines.append('VARIABLE = {0}'.format(self.variable))
        lines.append('ALIAS = {0}'.format(self.alias))
        lines.append('DID = {0}'.format(self.did))
        lines.append('')
        for dct in mp:
            for key, value in dct.iteritems():
                lines.append(key)
                lines.append('')
                for line in value():
                    lines.append(line)
            lines.append('')

        return lines

    def _as_dct_(self):
        ret = self.meta.copy()
        # # without a target variable, attempt to set start and end dates.
        if self.variable is None:
            ds = nc.Dataset(self.uri, 'r')
            try:
                time = ds.variables['time']
                time_bounds = [time[0], time[-1]]
                time_bounds = nc.num2date(time_bounds, time.units, calendar=time.calendar)
                derived = {'Start Date': str(time_bounds[0]), 'End Date': str(time_bounds[1])}
            except:
                warn('Time variable not found or improperly attributed. Setting "derived" key to None.')
                derived = None
            finally:
                ds.close()
        ## we can get derived values
        else:
            derived = OrderedDict()
            to_add = self.get_temporal_report() + self.get_spatial_report() + self.get_level_report()
            for row in to_add:
                try:
                    key, value = re.split(' = ', row, maxsplit=1)
                ## here to catch oddities of the returns
                except ValueError:
                    if row == 'No level dimension found.':
                        continue
                    else:
                        raise
                key = key.strip()
                derived.update({key: value})
        ret.update({'derived': derived})
        return ret