Пример #1
0
    def test_system_get_field_dimensioned_variables(self):
        """Test data is appropriately tagged to identify dimensioned variables."""

        path = self.get_temporary_file_path('foo.nc')
        time = TemporalVariable(value=[1, 2, 3], dimensions='time')
        x = Variable(name='x', value=[10, 20], dimensions='x')
        y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y')
        data1 = Variable(name='data1',
                         value=np.random.rand(3, 4, 2),
                         dimensions=['time', 'y', 'x'])
        data2 = Variable(name='data2',
                         value=np.random.rand(3, 4, 2),
                         dimensions=['time', 'y', 'x'])
        data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time'])
        field = Field(time=time,
                      grid=Grid(x, y),
                      variables=[data1, data2, data3])
        field.write(path)

        # Test dimensioned variables are read from a file with appropriate metadata.
        rd = RequestDataset(path)
        self.assertEqual(rd.variable, ('data1', 'data2'))
        read_field = rd.get()
        actual = get_variable_names(read_field.data_variables)
        self.assertEqual(actual, ('data1', 'data2'))

        # Test dimensioned variables are overloaded.
        rd = RequestDataset(path, variable='data2')
        read_field = rd.get()
        actual = get_variable_names(read_field.data_variables)
        self.assertEqual(actual, ('data2', ))
Пример #2
0
    def test_system_create_field_dimensioned_variables(self):
        """Test data is appropriately tagged to identify dimensioned variables."""

        path = self.get_temporary_file_path('foo.nc')
        time = TemporalVariable(value=[1, 2, 3], dimensions='time')
        x = Variable(name='x', value=[10, 20], dimensions='x')
        y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y')
        data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x'])
        data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x'])
        data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time'])
        field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3])
        field.write(path)

        # Test dimensioned variables are read from a file with appropriate metadata.
        rd = RequestDataset(path)
        self.assertEqual(rd.variable, ('data1', 'data2'))
        read_field = rd.get()
        actual = get_variable_names(read_field.data_variables)
        self.assertEqual(actual, ('data1', 'data2'))

        # Test dimensioned variables are overloaded.
        rd = RequestDataset(path, variable='data2')
        read_field = rd.get()
        actual = get_variable_names(read_field.data_variables)
        self.assertEqual(actual, ('data2',))
Пример #3
0
    def test(self):
        gs = self.fixture_grid_chunker()

        desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum()
        desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum)
        if vm.rank == 0:
            desired_sum = np.sum(desired_dst_grid_sum)

        desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)},
                   {'y': slice(0, 180, None), 'x': slice(240, 480, None)},
                   {'y': slice(0, 180, None), 'x': slice(480, 720, None)},
                   {'y': slice(180, 360, None), 'x': slice(0, 240, None)},
                   {'y': slice(180, 360, None), 'x': slice(240, 480, None)},
                   {'y': slice(180, 360, None), 'x': slice(480, 720, None)}]
        actual = list(gs.iter_dst_grid_slices())
        self.assertEqual(actual, desired)

        gs.write_chunks()

        if vm.rank == 0:
            rank_sums = []

        for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1):
            src_path = gs.create_full_path_from_template('src_template', index=ctr)
            dst_path = gs.create_full_path_from_template('dst_template', index=ctr)

            src_field = RequestDataset(src_path).get()
            dst_field = RequestDataset(dst_path).get()

            src_envelope_global = box(*src_field.grid.extent_global)
            dst_envelope_global = box(*dst_field.grid.extent_global)

            self.assertTrue(does_contain(src_envelope_global, dst_envelope_global))

            actual = get_variable_names(src_field.data_variables)
            self.assertIn('data', actual)

            actual = get_variable_names(dst_field.data_variables)
            self.assertIn('data', actual)
            actual_data_sum = dst_field['data'].get_value().sum()
            actual_data_sum = MPI_COMM.gather(actual_data_sum)
            if MPI_RANK == 0:
                actual_data_sum = np.sum(actual_data_sum)
                rank_sums.append(actual_data_sum)

        if vm.rank == 0:
            self.assertAlmostEqual(desired_sum, np.sum(rank_sums))
            index_path = gs.create_full_path_from_template('index_file')
            self.assertTrue(os.path.exists(index_path))

        vm.barrier()

        index_path = gs.create_full_path_from_template('index_file')
        index_field = RequestDataset(index_path).get()
        self.assertTrue(len(list(index_field.keys())) > 2)
Пример #4
0
    def test(self):
        gs = self.get_grid_splitter()

        desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum()
        desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum)
        if MPI_RANK == 0:
            desired_sum = np.sum(desired_dst_grid_sum)

        desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)},
                   {'y': slice(0, 180, None), 'x': slice(240, 480, None)},
                   {'y': slice(0, 180, None), 'x': slice(480, 720, None)},
                   {'y': slice(180, 360, None), 'x': slice(0, 240, None)},
                   {'y': slice(180, 360, None), 'x': slice(240, 480, None)},
                   {'y': slice(180, 360, None), 'x': slice(480, 720, None)}]
        actual = list(gs.iter_dst_grid_slices())
        self.assertEqual(actual, desired)

        gs.write_subsets()

        if MPI_RANK == 0:
            rank_sums = []

        for ctr in range(1, gs.nsplits_dst[0] * gs.nsplits_dst[1] + 1):
            src_path = gs.create_full_path_from_template('src_template', index=ctr)
            dst_path = gs.create_full_path_from_template('dst_template', index=ctr)

            src_field = RequestDataset(src_path).get()
            dst_field = RequestDataset(dst_path).get()

            src_envelope_global = box(*src_field.grid.extent_global)
            dst_envelope_global = box(*dst_field.grid.extent_global)

            self.assertTrue(does_contain(src_envelope_global, dst_envelope_global))

            actual = get_variable_names(src_field.data_variables)
            self.assertIn('data', actual)

            actual = get_variable_names(dst_field.data_variables)
            self.assertIn('data', actual)
            actual_data_sum = dst_field['data'].get_value().sum()
            actual_data_sum = MPI_COMM.gather(actual_data_sum)
            if MPI_RANK == 0:
                actual_data_sum = np.sum(actual_data_sum)
                rank_sums.append(actual_data_sum)

        if MPI_RANK == 0:
            self.assertAlmostEqual(desired_sum, np.sum(rank_sums))
            index_path = gs.create_full_path_from_template('index_file')
            self.assertTrue(os.path.exists(index_path))

        MPI_COMM.Barrier()

        index_path = gs.create_full_path_from_template('index_file')
        index_field = RequestDataset(index_path).get()
        self.assertTrue(len(list(index_field.keys())) > 2)
Пример #5
0
    def test1d(self):
        p1 = self.write_field_data('v1', ncol=1, nrow=1)
        p3 = self.write_field_data('v1', dir='b')

        ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)]
        reference = ocgis.RequestDataset(p1, time_range=ref_range).get()

        cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)]
        candidate = ocgis.RequestDataset(p3, time_range=cand_range)

        calc = [{
            'func': 'dissimilarity',
            'name': 'output_1d',
            'kwds': {
                'target': reference,
                'candidate': ('v1', )
            }
        }]

        ops = OcgOperations(dataset=candidate, calc=calc)
        ret = ops.execute()
        actual_field = ret.get_element()
        actual_variables = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual_variables[0], ('dissimilarity'))
        dist = actual_field['dissimilarity']
        self.assertEqual(dist.shape, (1, 1, 2, 2))
Пример #6
0
    def test_narccap_point_subset_small(self):
        dmap = {DimensionMapKey.X: {DimensionMapKey.VARIABLE: 'xc'},
                DimensionMapKey.Y: {DimensionMapKey.VARIABLE: 'yc'},
                DimensionMapKey.TIME: {DimensionMapKey.VARIABLE: 'time'}}
        rd = self.test_data.get_rd('narccap_pr_wrfg_ncep', kwds={'dimension_map': dmap})

        field = rd.get()
        self.assertIsInstance(field.crs, CFLambertConformal)
        self.assertIsNotNone(field.time)

        geom = [-97.74278, 30.26694]

        calc = [{'func': 'mean', 'name': 'mean'},
                {'func': 'median', 'name': 'median'},
                {'func': 'max', 'name': 'max'},
                {'func': 'min', 'name': 'min'}]
        calc_grouping = ['month', 'year']
        ops = ocgis.OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping,
                                  output_format=constants.OutputFormatName.OCGIS, geom=geom, abstraction='point',
                                  snippet=False, allow_empty=False, output_crs=Spherical(),
                                  search_radius_mult=2.0)
        ret = ops.execute()
        ref = ret.get_element()
        actual = set(get_variable_names(ref.data_variables))
        self.assertEqual(actual, {'mean', 'median', 'max', 'min'})
Пример #7
0
    def test_narccap_point_subset_small(self):
        dmap = {DimensionMapKey.X: {DimensionMapKey.VARIABLE: 'xc'},
                DimensionMapKey.Y: {DimensionMapKey.VARIABLE: 'yc'},
                DimensionMapKey.TIME: {DimensionMapKey.VARIABLE: 'time'}}
        rd = self.test_data.get_rd('narccap_pr_wrfg_ncep', kwds={'dimension_map': dmap})

        field = rd.get()
        self.assertIsInstance(field.crs, CFLambertConformal)
        self.assertIsNotNone(field.time)

        geom = [-97.74278, 30.26694]

        calc = [{'func': 'mean', 'name': 'mean'},
                {'func': 'median', 'name': 'median'},
                {'func': 'max', 'name': 'max'},
                {'func': 'min', 'name': 'min'}]
        calc_grouping = ['month', 'year']
        ops = ocgis.OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping,
                                  output_format=constants.OutputFormatName.OCGIS, geom=geom, abstraction='point',
                                  snippet=False, allow_empty=False, output_crs=Spherical(),
                                  search_radius_mult=2.0)
        ret = ops.execute()
        ref = ret.get_element()
        actual = set(get_variable_names(ref.data_variables))
        self.assertEqual(actual, {'mean', 'median', 'max', 'min'})
Пример #8
0
    def inquire_is_xyz(self, variable):
        """
        Inquire the dimension map to identify a variable's spatial classification.

        :param variable: The target variable to identify.
        :type variable: str | :class:`~ocgis.Variable`
        :rtype: :class:`ocgis.constants.DimensionMapKey`
        """
        name = get_variable_names(variable)[0]
        x = self.get_variable(DMK.X)
        y = self.get_variable(DMK.Y)
        z = self.get_variable(DMK.LEVEL)
        poss = {x: DMK.X, y: DMK.Y, z: DMK.LEVEL}
        ret = poss.get(name)
        if ret is None and self.has_topology:
            poss = {}
            topologies = self.get_available_topologies()
            for t in topologies:
                curr = self.get_topology(t)
                x = curr.get_variable(DMK.X)
                y = curr.get_variable(DMK.Y)
                z = curr.get_variable(DMK.LEVEL)
                poss.update({x: DMK.X, y: DMK.Y, z: DMK.LEVEL})
            ret = poss.get(name)
        return ret
Пример #9
0
    def set_spatial_mask(self, variable, attrs=None, default_attrs=None):
        """
        Set the spatial mask variable for the dimension map. If ``attrs`` is not ``None``, then ``attrs`` >
        ``variable.attrs`` (if ``variable`` is not a string) > default attributes.
        
        :param variable: The spatial mask variable.
        :param dict attrs: Attributes to associate with the spatial mask variable *in addition* to default attributes.
        :param dict default_attrs: If provided, use these attributes as default spatial mask attributes.
        :type variable: :class:`~ocgis.Variable` | :class:`str`
        """

        if default_attrs is None:
            default_attrs = deepcopy(DIMENSION_MAP_TEMPLATE[DMK.SPATIAL_MASK][DMK.ATTRS])

        try:
            vattrs = deepcopy(variable.attrs)
        except AttributeError:
            vattrs = {}

        if attrs is None:
            attrs = {}

        default_attrs.update(vattrs)
        default_attrs.update(attrs)

        variable = get_variable_names(variable)[0]
        entry = self._get_entry_(DMK.SPATIAL_MASK)
        entry[DMK.VARIABLE] = variable
        entry[DMK.ATTRS] = default_attrs
Пример #10
0
    def test_get_operations(self):
        rd = self.test_data.get_rd('cancm4_tas')
        qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30&output_format=shp&snippet=true".format(
            rd.uri)
        qi = QueryInterface(qs)
        ops = qi.get_operations()

        self.assertIsInstance(ops._get_object_('dataset'), Dataset)
        self.assertEqual(list(ops.dataset)[0].uri, rd.uri)
        self.assertIsInstance(ops.geom, GeomCabinetIterator)
        self.assertEqual(ops.spatial_operation, 'intersects')
        self.assertEqual(ops.geom_select_uid, (20, 30))

        qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30".format(
            rd.uri)
        qs += "&calc=mean~the_mean|median~the_median|freq_perc~the_p!percentile~90&calc_grouping=month|year&field_name=calcs"
        qs += "&output_crs=4326&conform_units_to=celsius"
        qs += "&time_region=year~2001"
        qi = QueryInterface(qs)
        ops = qi.get_operations()
        ret = ops.execute()
        self.assertEqual(list(ret.children.keys()), [20, 30])
        # self.assertEqual(ret[20]["calcs"].variables.keys(), ["the_mean", "the_median", "the_p"])
        self.assertEqual(
            get_variable_names(
                ret.get_element(container_ugid=20).data_variables),
            ("the_mean", "the_median", "the_p"))
        actual = ret.get_element(container_ugid=30, field_name='calcs')
        self.assertEqual(actual['the_mean'].shape, (12, 2, 2))
        # self.assertEqual(ret[30]["calcs"].variables["the_mean"].shape, (1, 12, 1, 2, 2))
        self.assertEqual(ops.calc[2]["ref"], FrequencyPercentile)
        self.assertEqual(ops.output_crs, CoordinateReferenceSystem(epsg=4326))
Пример #11
0
    def test_get_operations(self):
        rd = self.test_data.get_rd('cancm4_tas')
        qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30&output_format=shp&snippet=true".format(
            rd.uri)
        qi = QueryInterface(qs)
        ops = qi.get_operations()

        self.assertIsInstance(ops._get_object_('dataset'), Dataset)
        self.assertEqual(list(ops.dataset)[0].uri, rd.uri)
        self.assertIsInstance(ops.geom, GeomCabinetIterator)
        self.assertEqual(ops.spatial_operation, 'intersects')
        self.assertEqual(ops.geom_select_uid, (20, 30))

        qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30".format(rd.uri)
        qs += "&calc=mean~the_mean|median~the_median|freq_perc~the_p!percentile~90&calc_grouping=month|year&field_name=calcs"
        qs += "&output_crs=4326&conform_units_to=celsius"
        qs += "&time_region=year~2001"
        qi = QueryInterface(qs)
        ops = qi.get_operations()
        ret = ops.execute()
        self.assertEqual(list(ret.children.keys()), [20, 30])
        # self.assertEqual(ret[20]["calcs"].variables.keys(), ["the_mean", "the_median", "the_p"])
        self.assertEqual(get_variable_names(ret.get_element(container_ugid=20).data_variables),
                         ("the_mean", "the_median", "the_p"))
        actual = ret.get_element(container_ugid=30, field_name='calcs')
        self.assertEqual(actual['the_mean'].shape, (12, 2, 2))
        # self.assertEqual(ret[30]["calcs"].variables["the_mean"].shape, (1, 12, 1, 2, 2))
        self.assertEqual(ops.calc[2]["ref"], FrequencyPercentile)
        self.assertEqual(ops.output_crs, CoordinateReferenceSystem(epsg=4326))
Пример #12
0
    def test(self):
        path1 = self.write_field_data('data1')
        path2 = self.write_field_data('data2')
        path3 = self.write_field_data('basis_var')

        time_range = [datetime(2000, 3, 1), datetime(2000, 3, 31)]
        rds = [RequestDataset(p, time_range=time_range) for p in [path1, path2]]
        mrd = MultiRequestDataset(rds)

        basis = RequestDataset(path3, time_range=[datetime(2000, 8, 1), datetime(2000, 8, 31)])
        basis_field = basis.get()

        calc = [{'func': 'mfpf',
                 'name': 'output_mfpf',
                 'kwds': {'reference': ('data1', 'data2'),
                          'basis': basis_field}}]
        ops = OcgOperations(dataset=mrd, calc=calc)
        ret = ops.execute()
        actual_field = ret.get_element()
        actual_variables = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual_variables, ('diff_data1_basis_var', 'diff_data2_basis_var'))

        sums = [v.get_value().sum() for v in actual_field.data_variables]
        for s in sums:
            self.assertAlmostEqual(s, 7.8071042497325145)
Пример #13
0
    def test_system_add_variable(self):
        """Test adding variables from spatial collections."""

        # Create a few separate fields.
        variable_names = tuple(['a', 'b', 'c'])
        fields = [self.get_field(variable_name=v) for v in variable_names]
        # Create spatial collections containing those fields.
        scs = []
        for field in fields:
            sc = SpatialCollection()
            sc.add_field(field, None)
            scs.append(sc)

        # Destination spatial collection to add variables to from source spatial collections.
        grow = scs[0]
        # Loop over source fields.
        for idx in range(1, len(scs)):
            # Loop over child fields and spatial containers in the current source spatial collection.
            for field, container in scs[idx].iter_fields(yield_container=True):
                # TODO: This should be adjusted to allow easier selection with empty fields.
                try:
                    # Case when we have spatial containers.
                    grow_field = grow.get_element(field_name=field.name,
                                                  container_ugid=container)
                except KeyError:
                    # Case without spatial containers.
                    grow_field = grow.get_element(field.name)
                # Add data variables to the grow field.
                for dv in field.data_variables:
                    grow_field.add_variable(dv.extract(), is_data=True)

        # Assert all variables are present on the grow field.
        actual = grow.get_element()
        self.assertEqual(get_variable_names(actual.data_variables),
                         variable_names)

        # Write the spatial collection using a converter.
        conv = NcConverter([grow],
                           outdir=self.current_dir_output,
                           prefix='out.nc')
        conv.write()

        # Assert all variables are present.
        rd = RequestDataset(conv.path)
        actual = rd.get()
        self.assertEqual(get_variable_names(actual.data_variables),
                         variable_names)
Пример #14
0
    def test_system_through_operations(self):
        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd)
        ret = ops.execute()
        field = ret.get_element()
        actual = get_variable_names(field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd, output_format='nc')
        ret = ops.execute()
        actual_field = RequestDataset(ret).get()
        actual = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        actual_diff = actual_field.data_variables[1].get_value() - actual_field.data_variables[0].get_value()
        self.assertAlmostEqual(actual_diff.mean(), 1.0)
Пример #15
0
    def set_crs(self, variable):
        """
        Set the coordinate reference system variable name.

        :param variable: :class:`str` | :class:`~ocgis.Variable`
        """
        variable = get_variable_names(variable)[0]
        entry = self._get_entry_(DMK.CRS)
        entry[DMK.VARIABLE] = variable
Пример #16
0
    def test_system_through_operations(self):
        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd)
        ret = ops.execute()
        field = ret.get_element()
        actual = get_variable_names(field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        mrd = self.get_multirequestdataset()
        ops = OcgOperations(dataset=mrd, output_format='nc')
        ret = ops.execute()
        actual_field = RequestDataset(ret).get()
        actual = get_variable_names(actual_field.data_variables)
        self.assertEqual(actual, self.f_variable_names)

        actual_diff = actual_field.data_variables[1].get_value(
        ) - actual_field.data_variables[0].get_value()
        self.assertAlmostEqual(actual_diff.mean(), 1.0)
Пример #17
0
    def test_system_add_variable(self):
        """Test adding variables from spatial collections."""

        # Create a few separate fields.
        variable_names = tuple(['a', 'b', 'c'])
        fields = [self.get_field(variable_name=v) for v in variable_names]
        # Create spatial collections containing those fields.
        scs = []
        for field in fields:
            sc = SpatialCollection()
            sc.add_field(field, None)
            scs.append(sc)

        # Destination spatial collection to add variables to from source spatial collections.
        grow = scs[0]
        # Loop over source fields.
        for idx in range(1, len(scs)):
            # Loop over child fields and spatial containers in the current source spatial collection.
            for field, container in scs[idx].iter_fields(yield_container=True):
                # TODO: This should be adjusted to allow easier selection with empty fields.
                try:
                    # Case when we have spatial containers.
                    grow_field = grow.get_element(field_name=field.name, container_ugid=container)
                except KeyError:
                    # Case without spatial containers.
                    grow_field = grow.get_element(field.name)
                # Add data variables to the grow field.
                for dv in field.data_variables:
                    grow_field.add_variable(dv.extract(), is_data=True)

        # Assert all variables are present on the grow field.
        actual = grow.get_element()
        self.assertEqual(get_variable_names(actual.data_variables), variable_names)

        # Write the spatial collection using a converter.
        conv = NcConverter([grow], outdir=self.current_dir_output, prefix='out.nc')
        conv.write()

        # Assert all variables are present.
        rd = RequestDataset(conv.path)
        actual = rd.get()
        self.assertEqual(get_variable_names(actual.data_variables), variable_names)
Пример #18
0
 def test_system_units_validation_equal_units(self):
     # Heat index coefficients require the data be in specific units.
     field = self.get_field(name='tasmax', units='fahrenheit', with_value=True)
     field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True)
     with orphaned(field_rhs['rhsmax']):
         field.add_variable(field_rhs['rhsmax'], is_data=True)
     self.assertEqual(set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))),
                      set(['tasmax', 'rhsmax']))
     hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'})
     vc = hi.execute()
     self.assertIsInstance(vc, VariableCollection)
Пример #19
0
    def set_bounds(self, entry_key, bounds):
        """
        Set the bounds variable name for ``entry_key``.

        :param str entry_key: See :class:`ocgis.constants.DimensionMapKey` for valid entry keys.
        :param bounds: :class:`str` | :class:`~ocgis.Variable`
        """
        name = get_variable_names(bounds)[0]
        entry = self._get_entry_(entry_key)
        if entry[DMK.VARIABLE] is None:
            raise DimensionMapError(entry_key, 'No variable set. Bounds may not be set.')
        entry[DMK.BOUNDS] = name
Пример #20
0
    def test_system_units_validation_wrong_units(self):
        # Heat index coefficients require the data be in specific units.
        field = self.get_field(name='tasmax', units='kelvin', with_value=True)
        field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True)

        with orphaned(field_rhs['rhsmax']):
            field.add_variable(field_rhs['rhsmax'], is_data=True)

        self.assertEqual(set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))),
                         {'tasmax', 'rhsmax'})
        hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'})
        with self.assertRaises(UnitsValidationError):
            hi.execute()
Пример #21
0
    def add_variable(self, variable, force=False, is_data=False):
        """
        ..note:: Accepts all parameters to :meth:`~ocgis.VariableCollection.add_variable`.
        
        Additional keyword arguments are:
        
        :param bool is_data: If ``True``, the variable is considered a data variable.
        """

        super(Field, self).add_variable(variable, force=force)
        if is_data:
            tagged = get_variable_names(self.get_by_tag(TagName.DATA_VARIABLES, create=True))
            if variable.name not in tagged:
                self.append_to_tags(TagName.DATA_VARIABLES, variable.name)
Пример #22
0
    def test_as_field(self):
        """Test iteration returned as field objects."""

        select_ugid = [16, 17, 51]
        desired_data_variables = ('UGID', 'STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR')
        sci = GeomCabinetIterator(key='state_boundaries', select_uid=select_ugid, as_field=True)

        for _ in range(2):
            ugids = []
            for field in sci:
                self.assertIsInstance(field, Field)
                self.assertEqual(field.geom.ugid.shape, (1,))
                self.assertEqual(get_variable_names(field.data_variables), desired_data_variables)
                self.assertEqual(field.crs, WGS84())
                ugids.append(field.geom.ugid.get_value()[0])
Пример #23
0
    def test_as_field(self):
        """Test iteration returned as field objects."""

        select_ugid = [16, 17, 51]
        desired_data_variables = ('UGID', 'STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR')
        sci = GeomCabinetIterator(key='state_boundaries', select_uid=select_ugid, as_field=True)

        for _ in range(2):
            ugids = []
            for field in sci:
                self.assertIsInstance(field, Field)
                self.assertEqual(field.geom.ugid.shape, (1,))
                self.assertEqual(get_variable_names(field.data_variables), desired_data_variables)
                self.assertEqual(field.crs, WGS84())
                ugids.append(field.geom.ugid.get_value()[0])
Пример #24
0
def format_return_field(function_tag, out_field, new_temporal=None):
    # Remove the variables used by the calculation.
    try:
        to_remove = get_variable_names(out_field.get_by_tag(function_tag))
    except KeyError:
        # Let this fail quietly as the tag may not exist on incoming fields.
        pass
    else:
        for tr in to_remove:
            out_field.remove_variable(tr)

    # Remove the original time variable and replace with the new one if there is a new time dimension. New
    # time dimensions may not be present for calculations that do not compute one.
    if new_temporal is not None:
        out_field.remove_variable(out_field.time)
        out_field.set_time(new_temporal, force=True)
Пример #25
0
 def test_system_units_validation_equal_units(self):
     # Heat index coefficients require the data be in specific units.
     field = self.get_field(name='tasmax',
                            units='fahrenheit',
                            with_value=True)
     field_rhs = self.get_field(name='rhsmax',
                                units='percent',
                                with_value=True)
     with orphaned(field_rhs['rhsmax']):
         field.add_variable(field_rhs['rhsmax'], is_data=True)
     self.assertEqual(
         set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))),
         set(['tasmax', 'rhsmax']))
     hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'})
     vc = hi.execute()
     self.assertIsInstance(vc, VariableCollection)
Пример #26
0
    def test_system_units_validation_wrong_units(self):
        # Heat index coefficients require the data be in specific units.
        field = self.get_field(name='tasmax', units='kelvin', with_value=True)
        field_rhs = self.get_field(name='rhsmax',
                                   units='percent',
                                   with_value=True)

        with orphaned(field_rhs['rhsmax']):
            field.add_variable(field_rhs['rhsmax'], is_data=True)

        self.assertEqual(
            set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))),
            {'tasmax', 'rhsmax'})
        hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'})
        with self.assertRaises(UnitsValidationError):
            hi.execute()
Пример #27
0
def format_return_field(function_tag, out_field, new_temporal=None):
    # Remove the variables used by the calculation.
    try:
        to_remove = get_variable_names(out_field.get_by_tag(function_tag))
    except KeyError:
        # Let this fail quietly as the tag may not exist on incoming fields.
        pass
    else:
        for tr in to_remove:
            out_field.remove_variable(tr)

    # Remove the original time variable and replace with the new one if there is a new time dimension. New
    # time dimensions may not be present for calculations that do not compute one.
    if new_temporal is not None:
        out_field.remove_variable(out_field.time)
        out_field.set_time(new_temporal, force=True)
Пример #28
0
    def test_full(self):
        """Compute the dissimilarity will all metrics."""
        from flyingpigeon import dissimilarity
        from matplotlib import pyplot as plt

        p1 = self.write_field_data('v1', ncol=1, nrow=1)
        p2 = self.write_field_data('v2', ncol=1, nrow=1)
        p3 = self.write_field_data('v1', ncol=11, nrow=10, dir='c')
        p4 = self.write_field_data('v2', ncol=11, nrow=10, dir='c')

        ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)]
        ref = [ocgis.RequestDataset(p, time_range=ref_range) for p in [p1, p2]]
        reference = ocgis.MultiRequestDataset(ref)
        reference = reference.get()

        cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)]
        can = [
            ocgis.RequestDataset(p, time_range=cand_range) for p in [p3, p4]
        ]
        candidate = ocgis.MultiRequestDataset(can)

        fig, axes = plt.subplots(2, 3)
        for i, dist in enumerate(dissimilarity.__all__):

            calc = [{
                'func': 'dissimilarity',
                'name': 'output_mfpf',
                'kwds': {
                    'target': reference,
                    'candidate': ('v1', 'v2'),
                    'dist': dist
                }
            }]

            ops = OcgOperations(dataset=candidate, calc=calc)
            ret = ops.execute()
            out_field = ret.get_element()
            var_name = get_variable_names(out_field.data_variables)[0]
            out = out_field[var_name].get_value()[0, 0]
            axes.flat[i].imshow(out)
            axes.flat[i].set_title(dist)

        path = os.path.join(test_output_path,
                            'test_spatial_analog_metrics.png')
        plt.savefig(path)
        plt.close()
Пример #29
0
    def test_system_converting_state_boundaries_shapefile_memory(self):
        """Test iteration may be used in place of loading all values from source."""

        rd = RequestDataset(uri=self.path_state_boundaries)
        field = rd.get()
        data_variable_names = get_variable_names(field.data_variables)
        field.geom.protected = True
        sub = field.get_field_slice({'geom': slice(10, 20)})
        self.assertTrue(sub.geom.protected)
        self.assertFalse(sub.geom.has_allocated_value)

        self.assertIsInstance(sub, Field)
        self.assertIsInstance(sub.geom, GeometryVariable)
        gc = sub.geom.convert_to(use_geometry_iterator=True)
        self.assertIsInstance(gc, PolygonGC)

        self.assertFalse(sub.geom.has_allocated_value)
        self.assertTrue(field.geom.protected)
        path = self.get_temporary_file_path('out.nc')
        gc.parent.write(path)
Пример #30
0
    def test_get_intersects(self):
        subset = box(100.7, 39.71, 102.30, 42.30)
        desired_manual = [[[40.0, 40.0], [41.0, 41.0], [42.0, 42.0]],
                          [[101.0, 102.0], [101.0, 102.0], [101.0, 102.0]]]
        desired_manual = np.array(desired_manual)

        grid = self.get_gridxy(crs=WGS84())
        # self.write_fiona_htmp(grid, 'grid')
        # self.write_fiona_htmp(GeometryVariable(value=subset), 'subset')
        sub, sub_slc = grid.get_intersects(subset, return_slice=True)

        self.assertFalse(sub.has_allocated_point)
        self.assertFalse(sub.has_allocated_polygon)
        self.assertFalse(sub.has_allocated_abstraction_geometry)
        # self.write_fiona_htmp(sub, 'sub')
        self.assertEqual(sub_slc, (slice(0, 3, None), slice(0, 2, None)))
        self.assertNumpyAll(sub.get_value_stacked(), desired_manual)
        point = sub.get_point()
        self.assertEqual(point.crs, grid.crs)

        # Test masks are updated.
        grid = self.get_gridxy(with_xy_bounds=True, with_parent=True)
        for t in ['xbounds', 'ybounds']:
            self.assertIn(t, grid.parent)
        subset = 'Polygon ((100.81193771626298883 42.17577854671281301, 101.13166089965399408 42.21211072664360842, 101.34965397923876651 41.18754325259516236, 103.68944636678200766 41.34013840830451159, 103.63858131487890546 41.22387543252595776, 100.77560553633219342 41.08581314878893664, 100.81193771626298883 42.17577854671281301))'
        subset = wkt.loads(subset)
        sub = grid.get_intersects(subset, cascade=True)
        self.assertTrue(sub.get_mask().any())
        self.assertTrue(sub.get_abstraction_geometry().get_mask().any())
        mask_slice = {'ydim': slice(1, 2), 'xdim': slice(1, 3)}

        sub_member_variables = get_variable_names(sub.get_member_variables())
        for v in list(sub.parent.values()):
            if v.name in sub_member_variables and not isinstance(
                    v, GeometryVariable) and v.name != sub.mask_variable.name:
                self.assertIsNone(v.get_mask())
            else:
                self.assertTrue(v.get_mask().any())
                self.assertFalse(v.get_mask().all())
                actual = v[mask_slice].get_mask()
                self.assertTrue(np.all(actual))
Пример #31
0
    def test_system_masking(self):
        """Test behavior of the grid mask. This is an independently managed variable."""

        x = Variable('xc', value=[1, 2, 3], dimensions='dimx')
        y = Variable('yc', value=[10, 20, 30, 40], dimensions='dimy')
        grid = Grid(x, y)
        data = Variable('data',
                        value=np.zeros(grid.shape),
                        dimensions=['dimy', 'dimx'])
        grid.parent.add_variable(data)

        gmask = grid.get_mask()
        self.assertIsNone(gmask)
        self.assertIsNone(grid.mask_variable)

        new_mask = np.zeros(grid.shape, dtype=bool)
        new_mask[1, 1] = True
        grid.set_mask(new_mask, cascade=True)
        self.assertIsInstance(grid.mask_variable, Variable)
        actual = grid.get_mask()
        self.assertNumpyAll(actual, new_mask)
        actual = get_variable_names(grid.get_member_variables())
        desired = [x.name, y.name, grid._mask_name]
        self.assertAsSetEqual(actual, desired)
        self.assertNumpyAll(grid.get_mask(), data.get_mask())

        path = self.get_temporary_file_path('foo.nc')
        grid.parent.write(path)

        with self.nc_scope(path) as ds:
            actual = ds.variables[grid.mask_variable.name]
            self.assertNumpyAll(grid.get_mask(), actual[:].mask)

        # Test mask is used when read from file.
        actual_field = RequestDataset(path).get()
        self.assertNumpyAll(grid.get_mask(), actual_field.grid.get_mask())
        self.assertEqual(actual_field.grid.get_mask().sum(), 1)
        self.assertTrue(actual_field.grid.is_vectorized)
        self.assertEqual(actual_field.grid.get_mask().dtype, bool)
        actual_field.set_abstraction_geom()
        self.assertNumpyAll(actual_field.geom.get_mask(), grid.get_mask())
Пример #32
0
    def test_system_converting_state_boundaries_shapefile_memory(self):
        """Test iteration may be used in place of loading all values from source."""

        rd = RequestDataset(uri=self.path_state_boundaries)
        field = rd.get()
        data_variable_names = get_variable_names(field.data_variables)
        field.geom.protected = True
        sub = field.get_field_slice({'geom': slice(10, 20)})
        self.assertTrue(sub.geom.protected)
        self.assertFalse(sub.geom.has_allocated_value)

        self.assertIsInstance(sub, Field)
        self.assertIsInstance(sub.geom, GeometryVariable)
        gc = sub.geom.convert_to(use_geometry_iterator=True)
        self.assertIsInstance(gc, PolygonGC)

        # Test the new object does not share data with the source.
        for dn in data_variable_names:
            self.assertNotIn(dn, gc.parent)

        self.assertFalse(sub.geom.has_allocated_value)
        self.assertTrue(field.geom.protected)
        path = self.get_temporary_file_path('out.nc')
        gc.parent.write(path)
Пример #33
0
    def get_unioned(self,
                    dimensions=None,
                    union_dimension=None,
                    spatial_average=None,
                    root=0):
        """
        Unions _unmasked_ geometry objects. Collective across the current :class:`~ocgis.OcgVM`.
        """
        # TODO: optimize!

        # Get dimension names and lengths for the dimensions to union.
        if dimensions is None:
            dimensions = self.dimensions
        dimension_names = get_dimension_names(dimensions)
        dimension_lengths = [
            len(self.parent.dimensions[dn]) for dn in dimension_names
        ]

        # Get the variables to spatial average.
        if spatial_average is not None:
            variable_names_to_weight = get_variable_names(spatial_average)
        else:
            variable_names_to_weight = []

        # Get the new dimensions for the geometry variable. The union dimension is always the last dimension.
        if union_dimension is None:
            from ocgis.variable.dimension import Dimension
            union_dimension = Dimension(
                constants.DimensionName.UNIONED_GEOMETRY, 1)
        new_dimensions = []
        for dim in self.dimensions:
            if dim.name not in dimension_names:
                new_dimensions.append(dim)
        new_dimensions.append(union_dimension)

        # Configure the return variable.
        ret = self.copy()
        if spatial_average is None:
            ret = ret.extract()
        ret.set_mask(None)
        ret.set_value(None)
        ret.set_dimensions(new_dimensions)
        ret.allocate_value()

        # Destination indices in the return variable are filled with non-masked, unioned geometries.
        for dst_indices in product(
                *
            [list(range(dl)) for dl in get_dimension_lengths(new_dimensions)]):
            dst_slc = {
                new_dimensions[ii].name: dst_indices[ii]
                for ii in range(len(new_dimensions))
            }

            # Select the geometries to union skipping any masked geometries.
            to_union = deque()
            for indices in product(
                    *[list(range(dl)) for dl in dimension_lengths]):
                dslc = {
                    dimension_names[ii]: indices[ii]
                    for ii in range(len(dimension_names))
                }
                sub = self[dslc]
                sub_mask = sub.get_mask()
                if sub_mask is None:
                    to_union.append(sub.get_value().flatten()[0])
                else:
                    if not sub_mask.flatten()[0]:
                        to_union.append(sub.get_value().flatten()[0])

            # Execute the union operation.
            processed_to_union = deque()
            for geom in to_union:
                if isinstance(geom, MultiPolygon) or isinstance(
                        geom, MultiPoint):
                    for element in geom:
                        processed_to_union.append(element)
                else:
                    processed_to_union.append(geom)
            unioned = cascaded_union(processed_to_union)

            # Pull unioned geometries and union again for the final unioned geometry.
            if vm.size > 1:
                unioned_gathered = vm.gather(unioned)
                if vm.rank == root:
                    unioned = cascaded_union(unioned_gathered)

            # Fill the return geometry variable value with the unioned geometry.
            to_fill = ret[dst_slc].get_value()
            to_fill[0] = unioned

        # Spatial average shared dimensions.
        if spatial_average is not None:
            # Get source data to weight.
            for var_to_weight in filter(
                    lambda ii: ii.name in variable_names_to_weight,
                    list(self.parent.values())):
                # Holds sizes of dimensions to iterate. These dimension are not squeezed by the weighted averaging.
                range_to_itr = []
                # Holds the names of dimensions to squeeze.
                names_to_itr = []
                # Dimension names that are squeezed. Also the dimensions for the weight matrix.
                names_to_slice_all = []
                for dn in var_to_weight.dimensions:
                    if dn.name in self.dimension_names:
                        names_to_slice_all.append(dn.name)
                    else:
                        range_to_itr.append(len(dn))
                        names_to_itr.append(dn.name)

                # Reference the weights on the source geometry variable.
                weights = self[{
                    nsa: slice(None)
                    for nsa in names_to_slice_all
                }].weights

                # Path if there are iteration dimensions. Checks for axes ordering in addition.
                if len(range_to_itr) > 0:
                    # New dimensions for the spatially averaged variable. Unioned dimension is always last. Remove the
                    # dimensions aggregated by the weighted average.
                    new_dimensions = [
                        dim for dim in var_to_weight.dimensions
                        if dim.name not in dimension_names
                    ]
                    new_dimensions.append(union_dimension)

                    # Prepare the spatially averaged variable.
                    target = ret.parent[var_to_weight.name]
                    target.set_mask(None)
                    target.set_value(None)
                    target.set_dimensions(new_dimensions)
                    target.allocate_value()

                    # Swap weight axes to make sure they align with the target variable.
                    swap_chain = get_swap_chain(dimension_names,
                                                names_to_slice_all)
                    if len(swap_chain) > 0:
                        weights = weights.copy()
                    for sc in swap_chain:
                        weights = weights.swapaxes(*sc)

                    # The main weighting loop. Can get quite intensive with many, large iteration dimensions.
                    len_names_to_itr = len(names_to_itr)
                    slice_none = slice(None)
                    squeeze_out = [
                        ii for ii, dim in enumerate(var_to_weight.dimensions)
                        if dim.name in names_to_itr
                    ]
                    should_squeeze = True if len(squeeze_out) > 0 else False
                    np_squeeze = np.squeeze
                    np_atleast_1d = np.atleast_1d
                    np_ma_average = np.ma.average
                    for nonweighted_indices in product(
                            *[list(range(ri)) for ri in range_to_itr]):
                        w_slc = {
                            names_to_itr[ii]: nonweighted_indices[ii]
                            for ii in range(len_names_to_itr)
                        }
                        for nsa in names_to_slice_all:
                            w_slc[nsa] = slice_none
                        data_to_weight = var_to_weight[w_slc].get_masked_value(
                        )
                        if should_squeeze:
                            data_to_weight = np_squeeze(
                                data_to_weight, axis=tuple(squeeze_out))
                        weighted_value = np_atleast_1d(
                            np_ma_average(data_to_weight, weights=weights))
                        target[w_slc].get_value()[:] = weighted_value
                else:
                    target_to_weight = var_to_weight.get_masked_value()
                    # Sort to minimize floating point sum errors.
                    target_to_weight = target_to_weight.flatten()
                    weights = weights.flatten()
                    sindices = np.argsort(target_to_weight)
                    target_to_weight = target_to_weight[sindices]
                    weights = weights[sindices]

                    weighted_value = np.atleast_1d(
                        np.ma.average(target_to_weight, weights=weights))
                    target = ret.parent[var_to_weight.name]
                    target.set_mask(None)
                    target.set_value(None)
                    target.set_dimensions(new_dimensions)
                    target.set_value(weighted_value)

            # Collect areas of live ranks and convert to weights.
            if vm.size > 1:
                # If there is no area information (points for example, we need to use counts).
                if ret.area.data[0].max() == 0:
                    weight_or_proxy = float(self.size)
                else:
                    weight_or_proxy = ret.area.data[0]

                if vm.rank != root:
                    vm.comm.send(weight_or_proxy, dest=root)
                else:
                    live_rank_areas = [weight_or_proxy]
                    for tner in vm.ranks:
                        if tner != vm.rank:
                            recv_area = vm.comm.recv(source=tner)
                            live_rank_areas.append(recv_area)
                    live_rank_areas = np.array(live_rank_areas)

                    rank_weights = live_rank_areas / np.max(live_rank_areas)

                for var_to_weight in filter(
                        lambda ii: ii.name in variable_names_to_weight,
                        list(ret.parent.values())):
                    dimensions_to_itr = [
                        dim.name for dim in var_to_weight.dimensions
                        if dim.name != union_dimension.name
                    ]
                    slc = {union_dimension.name: 0}
                    for idx_slc in var_to_weight.iter_dict_slices(
                            dimensions=dimensions_to_itr):
                        idx_slc.update(slc)
                        to_weight = var_to_weight[idx_slc].get_value().flatten(
                        )[0]
                        if vm.rank == root:
                            collected_to_weight = [to_weight]
                        if not vm.rank == root:
                            vm.comm.send(to_weight, dest=root)
                        else:
                            for tner in vm.ranks:
                                if not tner == root:
                                    recv_to_weight = vm.comm.recv(source=tner)
                                    collected_to_weight.append(recv_to_weight)

                            # Sort to minimize floating point sum errors.
                            collected_to_weight = np.array(collected_to_weight)
                            sindices = np.argsort(collected_to_weight)
                            collected_to_weight = collected_to_weight[sindices]
                            rank_weights = rank_weights[sindices]

                            weighted = np.atleast_1d(
                                np.ma.average(collected_to_weight,
                                              weights=rank_weights))
                            var_to_weight[idx_slc].get_value()[:] = weighted
        if vm.rank == root:
            return ret
        else:
            return
Пример #34
0
 def test_get(self):
     mrd = self.get_multirequestdataset()
     mfield = mrd.get()
     actual = get_variable_names(mfield.data_variables)
     self.assertEqual(actual, self.f_variable_names)
Пример #35
0
    def __init__(self, variable, followers=None, value=None, mask=None, allow_masked=True, primary_mask=None,
                 slice_remap=None, shape=None, melted=None, repeaters=None, formatter=None, clobber_masked=True):
        if melted is not None and followers is None:
            raise ValueError('"melted" must be None if there are no "followers".')

        self.variable = variable
        self.formatter = formatter
        self.allow_masked = allow_masked
        self.slice_remap = slice_remap
        self.clobber_masked = clobber_masked

        if variable.repeat_record is not None:
            if repeaters is None:
                repeaters = variable.repeat_record
            else:
                repeaters += variable.repeat_record
        self.repeaters = repeaters

        self._is_lead = True

        if melted is None:
            melted_repeaters = None
        else:
            melted_repeaters = {}
            for m in melted:
                try:
                    if m.repeat_record is not None:
                        melted_repeaters[m.name] = m.repeat_record
                except AttributeError:
                    if m.repeaters is not None:
                        melted_repeaters[m.name] = m.repeat_record
        self.melted_repeaters = melted_repeaters

        if melted is not None:
            melted = get_variable_names(melted)
        self.melted = melted

        if shape is None:
            shape = self.variable.shape
        self.shape = shape

        if primary_mask is None:
            primary_mask = variable.name
        else:
            primary_mask = get_variable_names(primary_mask)[0]
        self.primary_mask = primary_mask

        if value is None:
            self.value = variable.get_value()
        else:
            self.value = value

        if mask is None:
            self.mask = variable.get_mask()
        else:
            self.mask = mask

        if followers is not None:
            dimensions = get_dimension_names(self.variable.dimensions)
            followers = get_followers(followers)
            for fidx, follower in enumerate(followers):
                if isinstance(follower, self.__class__):
                    iterator = follower
                    follower = follower.variable
                else:
                    iterator = Iterator(follower, allow_masked=allow_masked, primary_mask=primary_mask)

                follower_dimensions = get_dimension_names(follower.dimensions)
                set_follower_dimensions = set(follower_dimensions)
                set_dimensions = set(dimensions)
                if not set_follower_dimensions.issubset(set_dimensions):
                    msg = 'Follower variable "{}" dimensions are not a subset of the lead variable.'.format(
                        follower.name)
                    raise ValueError(msg)
                if follower_dimensions != dimensions:
                    follower_slice_remap = []
                    for d in follower_dimensions:
                        if d in set_dimensions:
                            follower_slice_remap.append(dimensions.index(d))
                    iterator.slice_remap = follower_slice_remap
                    iterator.shape = self.shape
                iterator._is_lead = False
                followers[fidx] = iterator
            self.iterators = [self] + followers
            self.followers = followers
        else:
            self.iterators = [self]
            self.followers = None

        self._is_recursing = False
Пример #36
0
 def test_get(self):
     mrd = self.get_multirequestdataset()
     mfield = mrd.get()
     actual = get_variable_names(mfield.data_variables)
     self.assertEqual(actual, self.f_variable_names)
Пример #37
0
    def __init__(self, **kwargs):
        kwargs = kwargs.copy()
        dimension_map = kwargs.pop('dimension_map', None)

        # Flag updated by driver to indicate if the coordinate system is assigned or implied.
        self._has_assigned_coordinate_system = False
        # Flag to indicate if this is a regrid destination.
        self.regrid_destination = kwargs.pop('regrid_destination', False)
        # Flag to indicate if this is a regrid source.
        self.regrid_source = kwargs.pop('regrid_source', True)

        # Other incoming data objects may have a coordinate system which should be used.
        crs = kwargs.pop(KeywordArgument.CRS, 'auto')

        # Add grid variable metadata to dimension map.
        grid = kwargs.pop(KeywordArgument.GRID, 'auto')

        # Configure the driver.
        driver = kwargs.pop(KeywordArgument.DRIVER, 'auto')

        # Extract standard coordinate variables from the field keyword arguments.
        k = (DimensionMapKey.GEOM, DimensionMapKey.REALIZATION, DimensionMapKey.TIME, DimensionMapKey.LEVEL)
        s = OrderedDict()
        for ii in k:
            s[ii] = kwargs.pop(ii, None)

        grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION, 'auto')
        if grid_abstraction is None:
            raise ValueError("'{}' may not be None.".format(KeywordArgument.GRID_ABSTRACTION))
        grid_is_isomorphic = kwargs.pop('grid_is_isomorphic', 'auto')
        if grid_is_isomorphic is None:
            raise ValueError("'{}' may not be None.".format('grid_is_isomorphic'))

        # TODO: This should maybe be part of the dimension map? Time variables are not dependent on fields.
        self.format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True)

        # Use tags to set data variables.
        is_data = kwargs.pop(KeywordArgument.IS_DATA, [])

        VariableCollection.__init__(self, **kwargs)

        dimension_map = deepcopy(dimension_map)
        if dimension_map is None:
            dimension_map = DimensionMap()
        elif isinstance(dimension_map, dict):
            dimension_map = DimensionMap.from_dict(dimension_map)
        self.dimension_map = dimension_map

        self.set_grid(grid, crs=crs)
        if driver != 'auto':
            self.dimension_map.set_driver(driver)
        if grid_abstraction != 'auto':
            self.dimension_map.set_grid_abstraction(grid_abstraction)
        if grid_is_isomorphic != 'auto':
            self.dimension_map.set_property(DMK.IS_ISOMORPHIC, grid_is_isomorphic)

        # Append the data variable tagged variable names.
        is_data = list(get_iter(is_data, dtype=Variable))
        is_data_variable_names = get_variable_names(is_data)
        for idvn in is_data_variable_names:
            self.append_to_tags(TagName.DATA_VARIABLES, idvn, create=True)
        for idx, dvn in enumerate(is_data_variable_names):
            if dvn not in self:
                if isinstance(is_data[idx], Variable):
                    self.add_variable(is_data[idx])

        # Configure the field updating the dimension map in the process.
        cvar = s[DimensionMapKey.REALIZATION]
        if cvar is not None:
            self.set_realization(cvar)
        cvar = s[DimensionMapKey.TIME]
        if cvar is not None:
            self.set_time(cvar)
        cvar = s[DimensionMapKey.LEVEL]
        if cvar is not None:
            self.set_level(cvar)
        cvar = s[DimensionMapKey.GEOM]
        if cvar is not None:
            self.set_geom(cvar, crs=crs)
        if crs != 'auto':
            self.set_crs(crs)
Пример #38
0
    def __init__(self,
                 variable,
                 followers=None,
                 value=None,
                 mask=None,
                 allow_masked=True,
                 primary_mask=None,
                 slice_remap=None,
                 shape=None,
                 melted=None,
                 repeaters=None,
                 formatter=None,
                 clobber_masked=True):
        if melted is not None and followers is None:
            raise ValueError(
                '"melted" must be None if there are no "followers".')

        self.variable = variable
        self.formatter = formatter
        self.allow_masked = allow_masked
        self.slice_remap = slice_remap
        self.clobber_masked = clobber_masked

        if variable.repeat_record is not None:
            if repeaters is None:
                repeaters = variable.repeat_record
            else:
                repeaters += variable.repeat_record
        self.repeaters = repeaters

        self._is_lead = True

        if melted is None:
            melted_repeaters = None
        else:
            melted_repeaters = {}
            for m in melted:
                try:
                    if m.repeat_record is not None:
                        melted_repeaters[m.name] = m.repeat_record
                except AttributeError:
                    if m.repeaters is not None:
                        melted_repeaters[m.name] = m.repeat_record
        self.melted_repeaters = melted_repeaters

        if melted is not None:
            melted = get_variable_names(melted)
        self.melted = melted

        if shape is None:
            shape = self.variable.shape
        self.shape = shape

        if primary_mask is None:
            primary_mask = variable.name
        else:
            primary_mask = get_variable_names(primary_mask)[0]
        self.primary_mask = primary_mask

        if value is None:
            self.value = variable.get_value()
        else:
            self.value = value

        if mask is None:
            self.mask = variable.get_mask()
        else:
            self.mask = mask

        if followers is not None:
            dimensions = get_dimension_names(self.variable.dimensions)
            followers = get_followers(followers)
            for fidx, follower in enumerate(followers):
                if isinstance(follower, self.__class__):
                    iterator = follower
                    follower = follower.variable
                else:
                    iterator = Iterator(follower,
                                        allow_masked=allow_masked,
                                        primary_mask=primary_mask)

                follower_dimensions = get_dimension_names(follower.dimensions)
                set_follower_dimensions = set(follower_dimensions)
                set_dimensions = set(dimensions)
                if not set_follower_dimensions.issubset(set_dimensions):
                    msg = 'Follower variable "{}" dimensions are not a subset of the lead variable.'.format(
                        follower.name)
                    raise ValueError(msg)
                if follower_dimensions != dimensions:
                    follower_slice_remap = []
                    for d in follower_dimensions:
                        if d in set_dimensions:
                            follower_slice_remap.append(dimensions.index(d))
                    iterator.slice_remap = follower_slice_remap
                    iterator.shape = self.shape
                iterator._is_lead = False
                followers[fidx] = iterator
            self.iterators = [self] + followers
            self.followers = followers
        else:
            self.iterators = [self]
            self.followers = None

        self._is_recursing = False
Пример #39
0
Файл: base.py Проект: NCPP/ocgis
    def create_field(self, *args, **kwargs):
        """
        Create a field object. In general, this should not be overloaded by subclasses.

        :keyword bool format_time: ``(=True)`` If ``False``, do not convert numeric times to Python date objects.
        :keyword str grid_abstraction: ``(='auto')`` If provided, use this grid abstraction.
        :keyword raw_field: ``(=None)`` If provided, modify this field instead.
        :type raw_field: None | :class:`~ocgis.Field`
        :param kwargs: Additional keyword arguments to :meth:`~ocgis.driver.base.AbstractDriver.create_raw_field`.
        :return: :class:`ocgis.Field`
        """
        kwargs = kwargs.copy()
        raw_field = kwargs.pop('raw_field', None)
        format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True)
        grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION, self.rd.grid_abstraction)
        grid_is_isomorphic = kwargs.pop('grid_is_isomorphic', self.rd.grid_is_isomorphic)

        if raw_field is None:
            # Get the raw variable collection from source.
            new_kwargs = kwargs.copy()
            new_kwargs['source_name'] = None
            raw_field = self.create_raw_field(*args, **new_kwargs)

        # Get the appropriate metadata for the collection.
        group_metadata = self.get_group_metadata(raw_field.group, self.metadata_source)
        # Always pull the dimension map from the request dataset. This allows it to be overloaded.
        dimension_map = self.get_group_metadata(raw_field.group, self.rd.dimension_map)

        # Modify the coordinate system variable. If it is overloaded on the request dataset, then the variable
        # collection needs to be updated to hold the variable and any alternative coordinate systems needs to be
        # removed.
        to_remove = None
        to_add = None
        crs = self.get_crs(group_metadata)
        if self.rd._has_assigned_coordinate_system:
            to_add = self.rd._crs
            if crs is not None:
                to_remove = crs.name
        else:
            if self.rd._crs is not None and self.rd._crs != 'auto':
                to_add = self.rd._crs
                if crs is not None:
                    to_remove = crs.name
            elif crs is not None:
                to_add = crs
        if to_remove is not None:
            raw_field.pop(to_remove, None)
        if to_add is not None:
            raw_field.add_variable(to_add, force=True)
        # Overload the dimension map with the CRS.
        if to_add is not None:
            # dimension_map[DimensionMapKey.CRS][DimensionMapKey.VARIABLE] = to_add.name
            dimension_map.set_crs(to_add.name)

        # Remove the mask variable if present in the raw dimension map and the source dimension map is set to None.
        if self.rd.dimension_map.get_spatial_mask() is None and self.dimension_map_raw.get_spatial_mask() is not None:
            raw_field.pop(self.dimension_map_raw.get_spatial_mask())

        # Convert the raw variable collection to a field.
        # TODO: Identify a way to remove this code block; field should be appropriately initialized; format_time and grid_abstraction are part of a dimension map.
        kwargs[KeywordArgument.DIMENSION_MAP] = dimension_map
        kwargs[KeywordArgument.FORMAT_TIME] = format_time
        if grid_abstraction != 'auto':
            kwargs[KeywordArgument.GRID_ABSTRACTION] = grid_abstraction
        if grid_is_isomorphic != 'auto':
            kwargs['grid_is_isomorphic'] = grid_is_isomorphic
        field = Field.from_variable_collection(raw_field, *args, **kwargs)

        # If this is a source grid for regridding, ensure the flag is updated.
        field.regrid_source = self.rd.regrid_source
        # Update the assigned coordinate system flag.
        field._has_assigned_coordinate_system = self.rd._has_assigned_coordinate_system

        # Apply any requested subsets.
        if self.rd.time_range is not None:
            field = field.time.get_between(*self.rd.time_range).parent
        if self.rd.time_region is not None:
            field = field.time.get_time_region(self.rd.time_region).parent
        if self.rd.time_subset_func is not None:
            field = field.time.get_subset_by_function(self.rd.time_subset_func).parent
        if self.rd.level_range is not None:
            field = field.level.get_between(*self.rd.level_range).parent

        # These variables have all the dimensions needed for a data classification. Use overloaded values from the
        # request dataset if they are provided.
        try:
            data_variable_names = list(get_variable_names(self.rd.rename_variable))
        except NoDataVariablesFound:
            # It is okay to have no data variables in a field.
            data_variable_names = []
            pass
        for dvn in data_variable_names:
            field.append_to_tags(TagName.DATA_VARIABLES, dvn, create=True)

        # Load child fields.
        for child in list(field.children.values()):
            kwargs['raw_field'] = child
            field.children[child.name] = self.create_field(*args, **kwargs)

        return field
Пример #40
0
    def get_field(self, *args, **kwargs):
        vc = kwargs.pop('vc', None)
        format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True)
        if KeywordArgument.GRID_ABSTRACTION in kwargs:
            grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION)
        else:
            grid_abstraction = constants.UNINITIALIZED

        if vc is None:
            # Get the raw variable collection from source.
            new_kwargs = kwargs.copy()
            new_kwargs['source_name'] = None
            vc = self.get_variable_collection(*args, **new_kwargs)

        # Get the appropriate metadata for the collection.
        group_metadata = self.get_group_metadata(vc.group, self.metadata_source)
        # Always pull the dimension map from the request dataset. This allows it to be overloaded.
        dimension_map = self.get_group_metadata(vc.group, self.rd.dimension_map)

        # Modify the coordinate system variable. If it is overloaded on the request dataset, then the variable
        # collection needs to be updated to hold the variable and any alternative coordinate systems needs to be
        # removed.
        to_remove = None
        to_add = None
        crs = self.get_crs(group_metadata)
        if self.rd._has_assigned_coordinate_system:
            to_add = self.rd._crs
            if crs is not None:
                to_remove = crs.name
        else:
            if self.rd._crs is not None and self.rd._crs != 'auto':
                to_add = self.rd._crs
                if crs is not None:
                    to_remove = crs.name
            elif crs is not None:
                to_add = crs
        if to_remove is not None:
            vc.pop(to_remove, None)
        if to_add is not None:
            vc.add_variable(to_add, force=True)
        # Overload the dimension map with the CRS.
        if to_add is not None:
            # dimension_map[DimensionMapKey.CRS][DimensionMapKey.VARIABLE] = to_add.name
            dimension_map.set_crs(to_add.name)

        # Remove the mask variable if present in the raw dimension map and the source dimension map is set to None.
        if self.rd.dimension_map.get_spatial_mask() is None and self.dimension_map_raw.get_spatial_mask() is not None:
            vc.pop(self.dimension_map_raw.get_spatial_mask())

        # Convert the raw variable collection to a field.
        kwargs['dimension_map'] = dimension_map
        kwargs[KeywordArgument.FORMAT_TIME] = format_time
        if grid_abstraction != constants.UNINITIALIZED:
            kwargs[KeywordArgument.GRID_ABSTRACTION] = grid_abstraction
        field = Field.from_variable_collection(vc, *args, **kwargs)

        # If this is a source grid for regridding, ensure the flag is updated.
        field.regrid_source = self.rd.regrid_source
        # Update the assigned coordinate system flag.
        field._has_assigned_coordinate_system = self.rd._has_assigned_coordinate_system

        # Apply any requested subsets.
        if self.rd.time_range is not None:
            field = field.time.get_between(*self.rd.time_range).parent
        if self.rd.time_region is not None:
            field = field.time.get_time_region(self.rd.time_region).parent
        if self.rd.time_subset_func is not None:
            field = field.time.get_subset_by_function(self.rd.time_subset_func).parent
        if self.rd.level_range is not None:
            field = field.level.get_between(*self.rd.level_range).parent

        # These variables have all the dimensions needed for a data classification. Use overloaded values from the
        # request dataset if they are provided.
        try:
            data_variable_names = list(get_variable_names(self.rd.rename_variable))
        except NoDataVariablesFound:
            # It is okay to have no data variables in a field.
            data_variable_names = []
            pass
        for dvn in data_variable_names:
            field.append_to_tags(TagName.DATA_VARIABLES, dvn, create=True)

        # Load child fields.
        for child in list(field.children.values()):
            kwargs['vc'] = child
            field.children[child.name] = self.get_field(*args, **kwargs)

        return field
Пример #41
0
    def set_variable(self, entry_key, variable, dimension=None, bounds=None, attrs=None, pos=None, dimensionless=False,
                     section=None):
        """
        Set coordinate variable information for ``entry_key``.
        
        :param str entry_key: See :class:`ocgis.constants.DimensionMapKey` for valid entry keys.
        :param variable: The variable to set. Use a variable object to auto-fill additional fields if they are ``None``.
        :type variable: :class:`str` | :class:`~ocgis.Variable`
        :param dimension: A sequence of dimension names. If ``None``, they will be pulled from ``variable`` if it is a
         variable object.
        :param bounds: See :meth:`~ocgis.DimensionMap.set_bounds`.
        :param dict attrs: Default attributes for the coordinate variables. If ``None``, they will be pulled from 
         ``variable`` if it is a variable object.
        :param int pos: The representative dimension position in ``variable`` if ``variable`` has more than one
         dimension. For example, a latitude variable may have two dimensions ``(lon, lat)``. The mapper must determine
         which dimension position is representative for the latitude variable when slicing.
        :param section: A slice-like tuple used to extract the data out of its source variable into a single variable
         format.
        :type section: tuple

        >>> section = (None, 0)
        >>> # This will be converted to a slice.
        >>> [slice(None), slice(0, 1)]

        :param bool dimensionless: If ``True``, this variable has no canonical dimension.
        :raises: DimensionMapError
        """
        if entry_key in self._special_entry_keys:
            raise DimensionMapError(entry_key, "The entry '{}' has a special set method.".format(entry_key))
        if section is not None and (pos is None and dimension is None):
            raise DimensionMapError(entry_key, "If a section is provided, position or dimension must be defined.")

        entry = self._get_entry_(entry_key)

        if variable is None:
            self._storage.pop(entry_key)
            return

        try:
            if bounds is None:
                bounds = variable.bounds
            if dimension is None:
                if variable.ndim > 1:
                    if pos is None and not dimensionless:
                        msg = "A position (pos) is required if no dimension is provided and target variable has " \
                              "greater than one dimension."
                        raise DimensionMapError(entry_key, msg)
                elif variable.ndim == 1:
                    pos = 0
                else:
                    pos = None
                # We can have scalar dimensions.
                if pos is not None and not dimensionless:
                    dimension = variable.dimensions[pos]
        except AttributeError:
            # Assume string type.
            pass

        value = get_variable_names(variable)[0]
        if bounds is not None:
            bounds = get_variable_names(bounds)[0]
        if dimension is None:
            dimension = []
        else:
            dimension = list(get_dimension_names(dimension))

        if attrs is None:
            try:
                attrs = self._storage.__class__(deepcopy(DIMENSION_MAP_TEMPLATE[entry_key][DMK.ATTRS]))
            except KeyError:
                # Default attributes are empty.
                attrs = self._storage.__class__()

        # Allow for any variable attributes.
        if hasattr(variable, 'attrs'):
            attrs.update(variable.attrs)

        # Dimension map attributes always take precedence. Dimension map attrs > Variable Attributes > Default Attributes
        current_attrs = self.get_attrs(entry_key)
        if current_attrs is None:
            current_attrs = self._storage.__class__()
        attrs.update(current_attrs)

        entry[DMK.VARIABLE] = value
        entry[DMK.BOUNDS] = bounds
        entry[DMK.DIMENSION] = dimension
        entry[DMK.ATTRS] = attrs
        if section is not None:
            entry[DMK.SECTION] = section
Пример #42
0
    def create_field(self, *args, **kwargs):
        """
        Create a field object. In general, this should not be overloaded by subclasses.

        :keyword bool format_time: ``(=True)`` If ``False``, do not convert numeric times to Python date objects.
        :keyword str grid_abstraction: ``(='auto')`` If provided, use this grid abstraction.
        :keyword raw_field: ``(=None)`` If provided, modify this field instead.
        :type raw_field: None | :class:`~ocgis.Field`
        :param kwargs: Additional keyword arguments to :meth:`~ocgis.driver.base.AbstractDriver.create_raw_field`.
        :return: :class:`ocgis.Field`
        """
        kwargs = kwargs.copy()
        raw_field = kwargs.pop('raw_field', None)
        format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True)
        grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION,
                                      self.rd.grid_abstraction)
        grid_is_isomorphic = kwargs.pop('grid_is_isomorphic',
                                        self.rd.grid_is_isomorphic)

        if raw_field is None:
            # Get the raw variable collection from source.
            new_kwargs = kwargs.copy()
            new_kwargs['source_name'] = None
            raw_field = self.create_raw_field(*args, **new_kwargs)

        # Get the appropriate metadata for the collection.
        group_metadata = self.get_group_metadata(raw_field.group,
                                                 self.metadata_source)
        # Always pull the dimension map from the request dataset. This allows it to be overloaded.
        dimension_map = self.get_group_metadata(raw_field.group,
                                                self.rd.dimension_map)

        # Modify the coordinate system variable. If it is overloaded on the request dataset, then the variable
        # collection needs to be updated to hold the variable and any alternative coordinate systems needs to be
        # removed.
        to_remove = None
        to_add = None
        crs = self.get_crs(group_metadata)
        if self.rd._has_assigned_coordinate_system:
            to_add = self.rd._crs
            if crs is not None:
                to_remove = crs.name
        else:
            if self.rd._crs is not None and self.rd._crs != 'auto':
                to_add = self.rd._crs
                if crs is not None:
                    to_remove = crs.name
            elif crs is not None:
                to_add = crs
        if to_remove is not None:
            raw_field.pop(to_remove, None)
        if to_add is not None:
            raw_field.add_variable(to_add, force=True)
        # Overload the dimension map with the CRS.
        if to_add is not None:
            # dimension_map[DimensionMapKey.CRS][DimensionMapKey.VARIABLE] = to_add.name
            dimension_map.set_crs(to_add.name)

        # Remove the mask variable if present in the raw dimension map and the source dimension map is set to None.
        if self.rd.dimension_map.get_spatial_mask(
        ) is None and self.dimension_map_raw.get_spatial_mask() is not None:
            raw_field.pop(self.dimension_map_raw.get_spatial_mask())

        # Convert the raw variable collection to a field.
        # TODO: Identify a way to remove this code block; field should be appropriately initialized; format_time and grid_abstraction are part of a dimension map.
        kwargs[KeywordArgument.DIMENSION_MAP] = dimension_map
        kwargs[KeywordArgument.FORMAT_TIME] = format_time
        if grid_abstraction != 'auto':
            kwargs[KeywordArgument.GRID_ABSTRACTION] = grid_abstraction
        if grid_is_isomorphic != 'auto':
            kwargs['grid_is_isomorphic'] = grid_is_isomorphic
        field = Field.from_variable_collection(raw_field, *args, **kwargs)

        # If this is a source grid for regridding, ensure the flag is updated.
        field.regrid_source = self.rd.regrid_source
        # Update the assigned coordinate system flag.
        field._has_assigned_coordinate_system = self.rd._has_assigned_coordinate_system

        # Apply any requested subsets.
        if self.rd.time_range is not None:
            field = field.time.get_between(*self.rd.time_range).parent
        if self.rd.time_region is not None:
            field = field.time.get_time_region(self.rd.time_region).parent
        if self.rd.time_subset_func is not None:
            field = field.time.get_subset_by_function(
                self.rd.time_subset_func).parent
        if self.rd.level_range is not None:
            field = field.level.get_between(*self.rd.level_range).parent

        # These variables have all the dimensions needed for a data classification. Use overloaded values from the
        # request dataset if they are provided.
        try:
            data_variable_names = list(
                get_variable_names(self.rd.rename_variable))
        except NoDataVariablesFound:
            # It is okay to have no data variables in a field.
            data_variable_names = []
            pass
        for dvn in data_variable_names:
            field.append_to_tags(TagName.DATA_VARIABLES, dvn, create=True)

        # Load child fields.
        for child in list(field.children.values()):
            kwargs['raw_field'] = child
            field.children[child.name] = self.create_field(*args, **kwargs)

        return field