Пример #1
0
    def test_aggregation_two_dims_using_moments_kernel(self):
        self.kernel = moments()
        data1 = make_regular_2d_ungridded_data_with_missing_values()
        data2 = make_regular_2d_ungridded_data_with_missing_values()
        data2.metadata._name = 'snow'
        data2._data += 10
        data = UngriddedDataList([data1, data2])
        grid = {'y': slice(-12.5, 12.5, 15), 'x': slice(-7.5, 7.5, 10)}

        output = data.aggregate(how=self.kernel, **grid)

        expect_mean = numpy.array([[4.4, 4.5], [35.0 / 3, 13.5]])
        expect_stddev = numpy.array([[numpy.sqrt(9.3), numpy.sqrt(4.5)],
                                     [numpy.sqrt(13.0 / 3), numpy.sqrt(4.5)]])
        expect_count = numpy.array([[5, 2], [3, 2]])

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output
        assert mean_1.var_name == 'rain'
        assert stddev_1.var_name == 'rain_std_dev'
        assert count_1.var_name == 'rain_num_points'
        assert mean_2.var_name == 'snow'
        assert stddev_2.var_name == 'snow_std_dev'
        assert count_2.var_name == 'snow_num_points'
        assert_arrays_almost_equal(mean_1.data, expect_mean)
        assert_arrays_almost_equal(stddev_1.data, expect_stddev)
        assert_arrays_almost_equal(count_1.data, expect_count)
        assert_arrays_almost_equal(mean_2.data, expect_mean + 10)
        assert_arrays_almost_equal(stddev_2.data, expect_stddev)
        assert_arrays_almost_equal(count_2.data, expect_count)
Пример #2
0
    def test_aggregation_two_dims_using_moments_kernel(self):
        self.kernel = moments()
        data1 = make_regular_2d_ungridded_data_with_missing_values()
        data2 = make_regular_2d_ungridded_data_with_missing_values()
        data2.metadata._name = 'snow'
        data2._data += 10
        data = UngriddedDataList([data1, data2])
        grid = {'y': slice(-12.5, 12.5, 15), 'x': slice(-7.5, 7.5, 10)}

        output = data.aggregate(how=self.kernel, **grid)

        expect_mean = numpy.array([[4.4, 4.5], [35.0 / 3, 13.5]])
        expect_stddev = numpy.array([[numpy.sqrt(9.3),
                                      numpy.sqrt(4.5)],
                                     [numpy.sqrt(13.0 / 3),
                                      numpy.sqrt(4.5)]])
        expect_count = numpy.array([[5, 2], [3, 2]])

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output
        assert mean_1.var_name == 'rain'
        assert stddev_1.var_name == 'rain_std_dev'
        assert count_1.var_name == 'rain_num_points'
        assert mean_2.var_name == 'snow'
        assert stddev_2.var_name == 'snow_std_dev'
        assert count_2.var_name == 'snow_num_points'
        assert_arrays_almost_equal(mean_1.data, expect_mean)
        assert_arrays_almost_equal(stddev_1.data, expect_stddev)
        assert_arrays_almost_equal(count_1.data, expect_count)
        assert_arrays_almost_equal(mean_2.data, expect_mean + 10)
        assert_arrays_almost_equal(stddev_2.data, expect_stddev)
        assert_arrays_almost_equal(count_2.data, expect_count)
Пример #3
0
    def test_aggregating_list_of_datasets_over_two_dims_with_diff_masks(self):
        grid = {'x': slice(-7.5, 7.5, 5), 'y': slice(-12.5, 12.5, 5)}

        var_0 = make_regular_2d_ungridded_data_with_missing_values()
        var_1 = make_regular_2d_ungridded_data_with_missing_values()

        var_1.data.mask = 1

        datalist = UngriddedDataList([var_0, var_1])

        cube_out = datalist.aggregate(how=self.kernel, **grid)

        result_0 = numpy.ma.array(
            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
             [10.0, 11.0, 12.0], [13.0, 14.0, 15.0]],
            mask=[[0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 0, 0]],
            fill_value=float('nan'))

        result_1 = numpy.ma.array(
            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
             [10.0, 11.0, 12.0], [13.0, 14.0, 15.0]],
            mask=[[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
            fill_value=float('nan'))

        print(cube_out[0].data.fill_value)
        assert len(cube_out) == 2
        compare_masked_arrays(cube_out[0].data, result_0)
        compare_masked_arrays(cube_out[1].data, result_1)
Пример #4
0
    def setUp(self):
        x_points = np.arange(-10, 11, 5)
        y_points = np.arange(-5, 6, 5)
        y, x = np.meshgrid(y_points, x_points)
        x = Coord(
            x, Metadata(name='lat', standard_name='latitude', units='degrees'))
        y = Coord(
            y, Metadata(name='lon', standard_name='longitude',
                        units='degrees'))
        data = np.reshape(np.arange(15) + 1.0, (5, 3))
        self.coords = CoordList([x, y])

        ug1 = UngriddedData(
            data,
            Metadata(standard_name='rainfall_flux',
                     long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                     units="kg m-2 s-1",
                     missing_value=-999), self.coords)
        ug2 = UngriddedData(
            data * 0.1,
            Metadata(standard_name='snowfall_flux',
                     long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                     units="kg m-2 s-1",
                     missing_value=-999), self.coords)
        self.ungridded_data_list = UngriddedDataList([ug1, ug2])
Пример #5
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes a list of HyperPoints and a data object (currently either Ungridded
        data or a Cube) and returns one new LazyData object with the values as determined by the
        constraint and kernel objects. The metadata for the output LazyData object is copied from
        the input data object.

        :param UngriddedData or UngriddedCoordinates points: Objects defining the sample points
        :param GriddedData or GriddedDataList data: Data to resample
        :param constraint: An instance of a Constraint subclass which takes a data object and
                           returns a subset of that data based on it's internal parameters
        :param kernel: An instance of a Kernel subclass which takes a number of points and returns
                       a single value
        :return: A single LazyData object
        """
        from cis.collocation.gridded_interpolation import GriddedUngriddedInterpolator
        log_memory_profile("GriddedUngriddedCollocator Initial")

        if isinstance(data, list):
            # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration,
            # so we really can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.extend(self.collocate(points, var, constraint, kernel))
            return output

        if constraint is not None and not isinstance(constraint, DummyConstraint):
            raise ValueError("A constraint cannot be specified for the GriddedUngriddedCollocator")
        data_points = data

        # First fix the sample points so that they all fall within the same 360 degree longitude range
        _fix_longitude_range(points.coords(), points)
        # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points
        _fix_longitude_range(points.coords(), data_points)

        log_memory_profile("GriddedUngriddedCollocator after data retrieval")

        logging.info("--> Collocating...")
        logging.info("    {} sample points".format(points.size))

        if self.interpolator is None:
            # Cache the interpolator
            self.interpolator = GriddedUngriddedInterpolator(data, points, kernel, self.missing_data_for_missing_sample)

        values = self.interpolator(data, fill_value=self.fill_value, extrapolate=self.extrapolate)

        log_memory_profile("GriddedUngriddedCollocator after running kernel on sample points")

        metadata = Metadata(self.var_name or data.var_name, long_name=self.var_long_name or data.long_name,
                            shape=values.shape, missing_value=self.fill_value, units=self.var_units or data.units)
        set_standard_name_if_valid(metadata, data.standard_name)
        return_data = UngriddedDataList([UngriddedData(values, metadata, points.coords())])

        log_memory_profile("GriddedUngriddedCollocator final")

        return return_data
Пример #6
0
    def test_GIVEN_UngriddedDataList_WHEN_constrain_THEN_correctly_subsetted_UngriddedDataList_returned(self):
        ug_data = cis.test.util.mock.make_regular_2d_ungridded_data()
        ug_data2 = UngriddedData(ug_data.data + 1, Metadata(name='snow', standard_name='snowfall_flux',
                                                            long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                                                            units="kg m-2 s-1", missing_value=-999), ug_data.coords())
        datalist = UngriddedDataList([ug_data, ug_data2])
        subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 10.0])

        assert isinstance(subset, UngriddedDataList)
        assert subset[0].data.tolist() == [5, 6, 8, 9, 11, 12, 14, 15]
        assert subset[1].data.tolist() == [6, 7, 9, 10, 12, 13, 15, 16]
Пример #7
0
 def test_combining(self):
     from cis.test.util.mock import make_regular_2d_ungridded_data
     another_list = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()])
     # Test adding
     assert_that(isinstance(self.ungridded_data_list + another_list, UngriddedDataList))
     # Test extending
     another_list.extend(self.ungridded_data_list)
     assert_that(isinstance(another_list, UngriddedDataList))
     assert_that(len(another_list) == 4)
     # Test can't add single items
     with assert_raises(TypeError):
         self.ungridded_data_list + another_list[0]
Пример #8
0
    def test_multiple_line(self):
        from cis.test.util.mock import make_dummy_ungridded_data_time_series
        from cis.data_io.ungridded_data import UngriddedDataList

        d = UngriddedDataList([make_dummy_ungridded_data_time_series(), make_dummy_ungridded_data_time_series()])
        _ = d[0].data
        d[1].data += 2.0
        d[1].metadata._name = 'snow'

        d.plot(how='line')

        self.check_graphic()
Пример #9
0
def mask_data(data, cad_score, extinction_qc, cad_confidence=20):
    """
    Default CAD confidence of 80 from doi:10.1002/2013JD019527

    The extinction QC values are::    
        Bit 	Value 	Interpretation
        1 	    0 	    unconstrained retrieval; initial lidar ratio unchanged during solution process
        1 	    1 	    constrained retrieval
        2 	    2 	    Initial lidar ratio reduced to prevent divergence of extinction solution
        3 	    4 	    Initial lidar ratio increased to reduce the number of negative extinction
                        coefficients in the derived solution
        4   	8 	    Calculated backscatter coefficient exceeds the maximum allowable value
        5   	16 	    Layer being analyzed has been identified by the feature finder as being totally
                        attenuating (i.e., opaque)
        6 	    32 	    Estimated optical depth error exceeds the maximum allowable value
        7 	    64 	    Solution converges, but with an unacceptably large number of negative values
        8 	    128 	Retrieval terminated at maximum iterations
        9 	    256 	No solution possible within allowable lidar ratio bounds
        16 	    32768 	Fill value or no solution attempted

    :param CommonDataList data: The data to be masked  
    :param cad_score: 
    :param extinction_qc: 
    :param cad_confidence: 
    :return: 
    """
    from cis.data_io.ungridded_data import UngriddedDataList

    column_mask = find_good_aerosol_columns(cad_score, cad_confidence) & find_good_extinction_columns(extinction_qc)

    # Now do the full profiles. Pull out the valid parts of the aerosol and extinction masks
    good_extinctions = _find_converged_extinction_points(extinction_qc.data[column_mask])
    aerosols = _find_aerosol(cad_score.data[column_mask], cad_confidence)

    # First create the aerosol masked data (which is a shared mask)
    compressed_data = UngriddedDataList()
    for d in data:
        if d.data.shape[0] != column_mask.shape[0]:
            # This only outputs a warning in numpy currently
            raise ValueError("The data shape doesn't match the mask shape")
        c = d[column_mask]
        # If the data has (an extended) second dimension
        if len(c.shape) > 1 and c.shape[1] > 1:
            # Apply the aerosol (2D) mask
            c.data = apply_mask_to_numpy_array(c.data, ~aerosols)
            if c.name().startswith('Extinction'):
                # Apply the good extinction (2D) mask
                c.data = apply_mask_to_numpy_array(c.data, ~good_extinctions)
        compressed_data.append(c)
        print("Valid {} points: {}".format(c.name(), c.count()))

    return compressed_data
Пример #10
0
    def constrain(self, data):
        """Subsets the supplied data.

        :param data: data to be subsetted
        :return: subsetted data
        """
        import numpy as np
        from datetime import datetime
        from cis.data_io.ungridded_data import UngriddedDataList

        if isinstance(data, list):
            # Calculating masks and indices will only take place on the first iteration,
            # so we can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.append(self.constrain(var))
            return output

        _data = self._create_data_for_subset(data)

        _shape = self._limits.pop('shape', None)

        if self._combined_mask is None:
            # Create the combined mask across all limits
            shape = _data.coords(
            )[0].data.shape  # This assumes they are all the same shape
            combined_mask = np.ones(shape, dtype=bool)
            for coord, limit in self._limits.items():
                # Convert the points to datetimes if the limit is a datetime
                if isinstance(limit.start, datetime):
                    points = _data.coord(coord).units.num2date(
                        _data.coord(coord).data)
                else:
                    points = _data.coord(coord).data
                # Select any points which are <= to the stop limit AND >= to the start limit
                mask = (np.less_equal(points, limit.stop)
                        & np.greater_equal(points, limit.start))
                combined_mask &= mask
            self._combined_mask = combined_mask

        _data = _data[self._combined_mask]

        if _shape is not None:
            if self._shape_indices is None:
                self._shape_indices = _get_ungridded_subset_region_indices(
                    _data, _shape)
            _data = _data[np.unravel_index(self._shape_indices, _data.shape)]

        if _data.size == 0:
            _data = None

        return _data
Пример #11
0
    def test_explicit_comparative_scatter(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        from cis.data_io.ungridded_data import UngriddedDataList

        d = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data(data_offset=2)])
        # This is needed to setup the coord shapes unfortunately...
        # TODO: Fix this in the Coord somewhere
        _ = d[0].data
        _ = d[1].data
        d[0].metadata._name = 'snow'
        d.plot(how='comparativescatter')

        self.check_graphic()
Пример #12
0
    def test_can_create_list_from_generators_and_other_iterators(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        import itertools
        another_list = UngriddedDataList((make_regular_2d_ungridded_data(),
                                          make_regular_2d_ungridded_data()))
        assert_that(len(another_list) == 2)

        dict = {
            1: [make_regular_2d_ungridded_data()],
            2: [make_regular_2d_ungridded_data()]
        }
        another_list = UngriddedDataList(
            itertools.chain.from_iterable(d for d in dict.values()))
        assert_that(len(another_list) == 2)
Пример #13
0
    def test_layer_opts(self):
        from cis.test.util.mock import make_dummy_ungridded_data_time_series
        from cis.data_io.ungridded_data import UngriddedDataList

        d = UngriddedDataList([make_dummy_ungridded_data_time_series(),
                               make_dummy_ungridded_data_time_series()])
        _ = d[0].data
        d[1].data += 2.0
        d[1].metadata._name = 'snow'

        d.plot(how='line', layer_opts=[dict(c='yellow', itemstyle='dotted'),
                                       dict(c='purple', itemstyle='dashed')])

        self.check_graphic()
Пример #14
0
    def constrain(self, data):
        """Subsets the supplied data.

        :param data: data to be subsetted
        :return: subsetted data
        """
        import numpy as np
        from datetime import datetime
        from cis.data_io.ungridded_data import UngriddedDataList

        if isinstance(data, list):
            # Calculating masks and indices will only take place on the first iteration,
            # so we can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.append(self.constrain(var))
            return output

        _data = self._create_data_for_subset(data)

        _shape = self._limits.pop('shape', None)

        if self._combined_mask is None:
            # Create the combined mask across all limits
            shape = _data.coords()[0].data.shape  # This assumes they are all the same shape
            combined_mask = np.ones(shape, dtype=bool)
            for coord, limit in self._limits.items():
                # Convert the points to datetimes if the limit is a datetime
                if isinstance(limit.start, datetime):
                    points = _data.coord(coord).units.num2date(_data.coord(coord).data)
                else:
                    points = _data.coord(coord).data
                # Select any points which are <= to the stop limit AND >= to the start limit
                mask = (np.less_equal(points, limit.stop) & np.greater_equal(points, limit.start))
                combined_mask &= mask
            self._combined_mask = combined_mask

        _data = _data[self._combined_mask]

        if _shape is not None:
            if self._shape_indices is None:
                self._shape_indices = _get_ungridded_subset_region_indices(_data, _shape)
            _data = _data[np.unravel_index(self._shape_indices, _data.shape)]

        if _data.size == 0:
            _data = None

        return _data
Пример #15
0
    def read_data_list(self, filenames, variables, product=None, aliases=None):
        """
        Read multiple data objects. Files can be either gridded or ungridded but not a mix of both.

        :param filenames: One or more filenames of the files to read
        :type filenames: string or list
        :param variables: One or more variables to read from the files
        :type variables: string or list
        :param str product: Name of data product to use (optional)
        :param aliases: List of variable aliases to put on each variables
         data object as an alternative means of identifying them. (Optional)
        :return:  A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the
         type of data contained in the files)
        """
        # if filenames or variables are not lists, make them lists of 1 element
        filenames = listify(filenames)
        variables = listify(variables)
        aliases = listify(aliases) if aliases else None

        variables = self._expand_wildcards(variables, filenames, product)

        data_list = None
        for idx, variable in enumerate(variables):
            var_data = self._get_data_func(filenames, variable, product)
            var_data.filenames = filenames
            if aliases:
                try:
                    var_data.alias = aliases[idx]
                except IndexError:
                    raise ValueError("Number of aliases does not match number of variables")
            if data_list is None:
                data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList()
            data_list.append(var_data)
        assert data_list is not None
        return data_list
Пример #16
0
    def test_GIVEN_UngriddedDataList_WHEN_constrain_THEN_correctly_subsetted_UngriddedDataList_returned(
            self):
        ug_data = cis.test.util.mock.make_regular_2d_ungridded_data()
        ug_data2 = UngriddedData(
            ug_data.data + 1,
            Metadata(name='snow',
                     standard_name='snowfall_flux',
                     long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                     units="kg m-2 s-1",
                     missing_value=-999), ug_data.coords())
        datalist = UngriddedDataList([ug_data, ug_data2])
        subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 10.0])

        assert isinstance(subset, UngriddedDataList)
        assert subset[0].data.tolist() == [5, 6, 8, 9, 11, 12, 14, 15]
        assert subset[1].data.tolist() == [6, 7, 9, 10, 12, 13, 15, 16]
Пример #17
0
    def test_list_ungridded_ungridded_box_mean(self):
        ug_data_1 = mock.make_regular_2d_ungridded_data()
        ug_data_2 = mock.make_regular_2d_ungridded_data(data_offset=3)
        ug_data_2.long_name = 'TOTAL SNOWFALL RATE: LS+CONV KG/M2/S'
        ug_data_2.standard_name = 'snowfall_flux'
        ug_data_2.metadata._name = 'snow'

        data_list = UngriddedDataList([ug_data_1, ug_data_2])
        sample_points = mock.make_regular_2d_ungridded_data()
        constraint = SepConstraintKdtree('500km')
        kernel = moments()
        col = GeneralUngriddedCollocator()
        output = col.collocate(sample_points, data_list, constraint, kernel)

        expected_result = np.array(list(range(1, 16)))
        expected_n = np.array(15 * [1])
        assert len(output) == 6
        assert isinstance(output, UngriddedDataList)
        assert output[3].var_name == 'snow'
        assert output[4].var_name == 'snow_std_dev'
        assert output[5].var_name == 'snow_num_points'
        assert np.allclose(output[0].data, expected_result)
        assert all(output[1].data.mask)
        assert np.allclose(output[2].data, expected_n)
        assert np.allclose(output[3].data, expected_result + 3)
        assert all(output[4].data.mask)
        assert np.allclose(output[5].data, expected_n)
Пример #18
0
 def test_combining(self):
     from cis.test.util.mock import make_regular_2d_ungridded_data
     another_list = UngriddedDataList([
         make_regular_2d_ungridded_data(),
         make_regular_2d_ungridded_data()
     ])
     # Test adding
     assert_that(
         isinstance(self.ungridded_data_list + another_list,
                    UngriddedDataList))
     # Test extending
     another_list.extend(self.ungridded_data_list)
     assert_that(isinstance(another_list, UngriddedDataList))
     assert_that(len(another_list) == 4)
     # Test can't add single items
     with assert_raises(TypeError):
         self.ungridded_data_list + another_list[0]
Пример #19
0
class TestUngriddedDataList(TestCase):

    def setUp(self):
        x_points = np.arange(-10, 11, 5)
        y_points = np.arange(-5, 6, 5)
        y, x = np.meshgrid(y_points, x_points)
        x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees'))
        y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees'))
        data = np.reshape(np.arange(15) + 1.0, (5, 3))
        self.coords = CoordList([x, y])

        ug1 = UngriddedData(data, Metadata(standard_name='rain', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                                           units="kg m-2 s-1", missing_value=-999), self.coords)
        ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snow', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                                                 units="kg m-2 s-1", missing_value=-999), self.coords)
        self.ungridded_data_list = UngriddedDataList([ug1, ug2])


    def test_GIVEN_data_containing_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(self):

        unique_coords = self.ungridded_data_list.coords()
        assert_that(len(unique_coords), is_(2))
        assert_that(isinstance(unique_coords, CoordList))
        coord_names = [coord.standard_name for coord in unique_coords]
        assert_that(coord_names, contains_inanyorder('latitude', 'longitude'))

    @skip_pandas
    def test_GIVEN_multiple_ungridded_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self):

        df = self.ungridded_data_list.as_data_frame()

        assert_that(df['rain'][5] == 6)
        assert_almost_equal(df['snow'][5], 0.6)
        assert_that(df['lat'][13] == 10)
        assert_that(df['lon'][0] == -5)

    @skip_pandas
    def test_GIVEN_multiple_ungridded_data_with_missing_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self):
        d = np.reshape(np.arange(15) + 10.0, (5, 3))

        data = np.ma.masked_array(d, np.zeros(d.shape, dtype=bool))
        data.mask[1,2] = True

        ug3 = UngriddedData(data, Metadata(standard_name='hail', long_name="TOTAL HAIL RATE: LS+CONV KG/M2/S",
                                           units="kg m-2 s-1", missing_value=-999), self.coords)

        self.ungridded_data_list.append(ug3)

        df = self.ungridded_data_list.as_data_frame()

        assert_that(df['rain'][5] == 6)
        assert_almost_equal(df['snow'][5], 0.6)
        assert_that(df['lat'][13] == 10)
        assert_that(df['lon'][0] == -5)
        assert_almost_equal(df['hail'][1], 11.0)
        assert_that(np.isnan(df['hail'][np.ravel_multi_index([1, 2], (5, 3))]))

        self.ungridded_data_list.pop()
Пример #20
0
    def test_GIVEN_grids_contain_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(self):
        x_points = np.arange(-10, 11, 5)
        y_points = np.arange(-5, 6, 5)
        y, x = np.meshgrid(y_points, x_points)
        x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees'))
        y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees'))
        data = np.reshape(np.arange(15) + 1.0, (5, 3))
        coords = CoordList([x, y])

        ug1 = UngriddedData(data, Metadata(standard_name='rain', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                                           units="kg m-2 s-1", missing_value=-999), coords)
        ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snow', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                                                 units="kg m-2 s-1", missing_value=-999), coords)
        ungridded_data_list = UngriddedDataList([ug1, ug2])

        unique_coords = ungridded_data_list.coords()
        assert_that(len(unique_coords), is_(2))
        assert_that(isinstance(unique_coords, CoordList))
        coord_names = [coord.standard_name for coord in unique_coords]
        assert_that(coord_names, contains_inanyorder('latitude', 'longitude'))
Пример #21
0
    def test_implicit_comparative_scatter(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        from cis.data_io.ungridded_data import UngriddedDataList

        d = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data(data_offset=2)])
        # This is needed to setup the coord shapes unfortunately...
        _ = d[0].data
        _ = d[1].data
        d[0].metadata._name = 'snow'
        d[1].plot(xaxis=d[0])

        self.check_graphic()
Пример #22
0
    def test_aggregating_list_of_datasets_over_two_dims(self):
        grid = {'x': slice(-7.5, 7.5, 5), 'y': slice(-12.5, 12.5, 5)}

        datalist = UngriddedDataList([make_regular_2d_ungridded_data_with_missing_values(),
                                      make_regular_2d_ungridded_data_with_missing_values()])

        cube_out = datalist.aggregate(how=self.kernel, **grid)

        result = numpy.ma.array([[1.0, 2.0, 3.0],
                                 [4.0, 5.0, 6.0],
                                 [7.0, 8.0, 9.0],
                                 [10.0, 11.0, 12.0],
                                 [13.0, 14.0, 15.0]],
                                mask=[[0, 0, 0],
                                      [0, 1, 0],
                                      [0, 0, 1],
                                      [0, 0, 0],
                                      [1, 0, 0]], fill_value=float('nan'))

        assert len(cube_out) == 2
        compare_masked_arrays(cube_out[0].data, result)
        compare_masked_arrays(cube_out[1].data, result)
Пример #23
0
    def create_data_object(self, filenames, variable):
        from itertools import product

        logging.debug("Creating data object for variable " + variable)

        # reading coordinates
        # the variable here is needed to work out whether to apply interpolation to the lat/lon data or not
        coords = self._create_coord_list(filenames, variable)

        # reading of variables
        sdata, vdata = hdf.read(filenames, variable)

        # retrieve data + its metadata
        var = sdata[variable]
        metadata = hdf.read_metadata(var, "SD")

        # Check the dimension of this variable
        _, ndim, dim_len, _, _ = var[0].info()
        if ndim == 2:
            return UngriddedData(var, metadata, coords, _get_MODIS_SDS_data)

        elif ndim < 2:
            raise NotImplementedError("1D field in MODIS L2 data.")

        else:
            result = UngriddedDataList()

            # Iterate over all but the last two dimensions
            ranges = [range(n) for n in dim_len[:-2]]
            for indices in product(*ranges):
                for manager in var:
                    manager._start = list(indices) + [0, 0]
                    manager._count = [1
                                      ] * len(indices) + manager.info()[2][-2:]
                result.append(
                    UngriddedData(var, metadata, coords.copy(),
                                  _get_MODIS_SDS_data))
            return result
Пример #24
0
    def setUp(self):
        x_points = np.arange(-10, 11, 5)
        y_points = np.arange(-5, 6, 5)
        y, x = np.meshgrid(y_points, x_points)
        x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees'))
        y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees'))
        data = np.reshape(np.arange(15) + 1.0, (5, 3))
        self.coords = CoordList([x, y])

        ug1 = UngriddedData(data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                                           units="kg m-2 s-1", missing_value=-999), self.coords)
        ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                                                 units="kg m-2 s-1", missing_value=-999), self.coords)
        self.ungridded_data_list = UngriddedDataList([ug1, ug2])
Пример #25
0
    def test_list_of_data(self):
        sample = mock.make_regular_2d_ungridded_data()
        data = UngriddedDataList([
            mock.make_regular_2d_ungridded_data(data_offset=5),
            mock.make_regular_2d_ungridded_data(data_offset=10)
        ])
        col = DummyCollocator()
        con = None
        kernel = None

        output = col.collocate(sample, data, con, kernel)

        assert len(output) == 2
        assert numpy.array_equal(output[0].data, data[0].data)
        assert numpy.array_equal(output[1].data, data[1].data)
Пример #26
0
class TestUngriddedDataList(TestCase):
    def setUp(self):
        x_points = np.arange(-10, 11, 5)
        y_points = np.arange(-5, 6, 5)
        y, x = np.meshgrid(y_points, x_points)
        x = Coord(
            x, Metadata(name='lat', standard_name='latitude', units='degrees'))
        y = Coord(
            y, Metadata(name='lon', standard_name='longitude',
                        units='degrees'))
        data = np.reshape(np.arange(15) + 1.0, (5, 3))
        self.coords = CoordList([x, y])

        ug1 = UngriddedData(
            data,
            Metadata(standard_name='rainfall_flux',
                     long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                     units="kg m-2 s-1",
                     missing_value=-999), self.coords)
        ug2 = UngriddedData(
            data * 0.1,
            Metadata(standard_name='snowfall_flux',
                     long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                     units="kg m-2 s-1",
                     missing_value=-999), self.coords)
        self.ungridded_data_list = UngriddedDataList([ug1, ug2])

    def test_slicing(self):
        single_item = self.ungridded_data_list[1]
        assert_that(isinstance(single_item, UngriddedData))
        many_items = self.ungridded_data_list[0:1]
        assert_that(isinstance(many_items, UngriddedDataList))
        many_items = self.ungridded_data_list[0:]
        assert_that(isinstance(many_items, UngriddedDataList))

    def test_combining(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        another_list = UngriddedDataList([
            make_regular_2d_ungridded_data(),
            make_regular_2d_ungridded_data()
        ])
        # Test adding
        assert_that(
            isinstance(self.ungridded_data_list + another_list,
                       UngriddedDataList))
        # Test extending
        another_list.extend(self.ungridded_data_list)
        assert_that(isinstance(another_list, UngriddedDataList))
        assert_that(len(another_list) == 4)
        # Test can't add single items
        with assert_raises(TypeError):
            self.ungridded_data_list + another_list[0]

    def test_can_get_string_of_list(self):
        s = str(self.ungridded_data_list)
        assert_that(
            s ==
            "UngriddedDataList: \n0: Ungridded data: rainfall_flux / (kg m-2 s-1) \n"
            "1: Ungridded data: snowfall_flux / (kg m-2 s-1) \nCoordinates: \n  latitude\n  longitude\n"
        )

    def test_GIVEN_data_containing_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(
            self):
        unique_coords = self.ungridded_data_list.coords()
        assert_that(len(unique_coords), is_(2))
        assert_that(isinstance(unique_coords, CoordList))
        coord_names = [coord.standard_name for coord in unique_coords]
        assert_that(coord_names, contains_inanyorder('latitude', 'longitude'))

    def test_can_create_list_from_generators_and_other_iterators(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        import itertools
        another_list = UngriddedDataList((make_regular_2d_ungridded_data(),
                                          make_regular_2d_ungridded_data()))
        assert_that(len(another_list) == 2)

        dict = {
            1: [make_regular_2d_ungridded_data()],
            2: [make_regular_2d_ungridded_data()]
        }
        another_list = UngriddedDataList(
            itertools.chain.from_iterable(d for d in dict.values()))
        assert_that(len(another_list) == 2)

    @skip_pandas
    def test_GIVEN_multiple_ungridded_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(
            self):

        df = self.ungridded_data_list.as_data_frame()

        assert_that(df['rainfall_flux'][5] == 6)
        assert_almost_equal(df['snowfall_flux'][5], 0.6)
        assert_that(df['latitude'][13] == 10)
        assert_that(df['longitude'][0] == -5)

    @skip_pandas
    def test_GIVEN_multiple_ungridded_data_with_missing_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(
            self):
        d = np.reshape(np.arange(15) + 10.0, (5, 3))

        data = np.ma.masked_array(d, np.zeros(d.shape, dtype=bool))
        data.mask[1, 2] = True

        ug3 = UngriddedData(
            data,
            Metadata(name='hail',
                     long_name="TOTAL HAIL RATE: LS+CONV KG/M2/S",
                     units="kg m-2 s-1",
                     missing_value=-999), self.coords)

        self.ungridded_data_list.append(ug3)

        df = self.ungridded_data_list.as_data_frame()

        assert_that(df['rainfall_flux'][5] == 6)
        assert_almost_equal(df['snowfall_flux'][5], 0.6)
        assert_that(df['latitude'][13] == 10)
        assert_that(df['longitude'][0] == -5)
        assert_almost_equal(df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][1], 11.0)
        assert_that(
            np.isnan(
                df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][np.ravel_multi_index(
                    [1, 2], (5, 3))]))

        self.ungridded_data_list.pop()
Пример #27
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes a list of HyperPoints and a data object (currently either Ungridded
        data or a Cube) and returns one new LazyData object with the values as determined by the
        constraint and kernel objects. The metadata for the output LazyData object is copied from
        the input data object.

        :param UngriddedData or UngriddedCoordinates points: Object defining the sample points
        :param UngriddedData data: The source data to collocate from
        :param constraint: An instance of a Constraint subclass which takes a data object and
                           returns a subset of that data based on it's internal parameters
        :param kernel: An instance of a Kernel subclass which takes a number of points and returns
                       a single value
        :return UngriddedData or UngriddedDataList: Depending on the input
        """
        log_memory_profile("GeneralUngriddedCollocator Initial")

        if isinstance(data, list):
            # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration,
            # so we really can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.extend(self.collocate(points, var, constraint, kernel))
            return output

        # First fix the sample points so that they all fall within the same 360 degree longitude range
        _fix_longitude_range(points.coords(), points)
        # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points
        _fix_longitude_range(points.coords(), data)

        # Convert to dataframes for fancy indexing
        sample_points = points.as_data_frame(time_index=False, name='vals')
        data_points = data.as_data_frame(time_index=False, name='vals').dropna(axis=0)

        log_memory_profile("GeneralUngriddedCollocator after data retrieval")

        # Create index if constraint and/or kernel require one.
        coord_map = None
        data_index.create_indexes(constraint, points, data_points, coord_map)
        log_memory_profile("GeneralUngriddedCollocator after indexing")

        logging.info("--> Collocating...")

        # Create output arrays.
        self.var_name = data.var_name
        self.var_long_name = data.long_name
        self.var_standard_name = data.standard_name
        self.var_units = data.units
        var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name,
                                                      self.var_standard_name, self.var_units)

        sample_points_count = len(sample_points)
        # Create an empty masked array to store the collocated values. The elements will be unmasked by assignment.
        values = np.ma.masked_all((len(var_set_details), sample_points_count))
        values.fill_value = self.fill_value
        log_memory_profile("GeneralUngriddedCollocator after output array creation")

        logging.info("    {} sample points".format(sample_points_count))
        # Apply constraint and/or kernel to each sample point.

        if isinstance(kernel, nn_horizontal_only):
            # Only find the nearest point using the kd-tree, without constraint in other dimensions
            nearest_points = data_points.iloc[constraint.haversine_distance_kd_tree_index.find_nearest_point(sample_points)]
            values[0, :] = nearest_points.vals.values
        else:
            for i, point, con_points in constraint.get_iterator(self.missing_data_for_missing_sample, None, None,
                                                                data_points, None, sample_points, None):

                try:
                    values[:, i] = kernel.get_value(point, con_points)
                    # Kernel returns either a single value or a tuple of values to insert into each output variable.
                except CoordinateMultiDimError as e:
                    raise NotImplementedError(e)
                except ValueError as e:
                    pass
        log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points")

        # Mask any bad values
        values = np.ma.masked_invalid(values)

        return_data = UngriddedDataList()
        for idx, var_details in enumerate(var_set_details):
            var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),),
                                    missing_value=self.fill_value, units=var_details[3])
            set_standard_name_if_valid(var_metadata, var_details[2])
            return_data.append(UngriddedData(values[idx, :], var_metadata, points.coords()))
        log_memory_profile("GeneralUngriddedCollocator final")

        return return_data
Пример #28
0
class TestUngriddedDataList(TestCase):

    def setUp(self):
        x_points = np.arange(-10, 11, 5)
        y_points = np.arange(-5, 6, 5)
        y, x = np.meshgrid(y_points, x_points)
        x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees'))
        y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees'))
        data = np.reshape(np.arange(15) + 1.0, (5, 3))
        self.coords = CoordList([x, y])

        ug1 = UngriddedData(data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                                           units="kg m-2 s-1", missing_value=-999), self.coords)
        ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S",
                                                 units="kg m-2 s-1", missing_value=-999), self.coords)
        self.ungridded_data_list = UngriddedDataList([ug1, ug2])

    def test_slicing(self):
        single_item = self.ungridded_data_list[1]
        assert_that(isinstance(single_item, UngriddedData))
        many_items = self.ungridded_data_list[0:1]
        assert_that(isinstance(many_items, UngriddedDataList))
        many_items = self.ungridded_data_list[0:]
        assert_that(isinstance(many_items, UngriddedDataList))

    def test_combining(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        another_list = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()])
        # Test adding
        assert_that(isinstance(self.ungridded_data_list + another_list, UngriddedDataList))
        # Test extending
        another_list.extend(self.ungridded_data_list)
        assert_that(isinstance(another_list, UngriddedDataList))
        assert_that(len(another_list) == 4)
        # Test can't add single items
        with assert_raises(TypeError):
            self.ungridded_data_list + another_list[0]

    def test_can_get_string_of_list(self):
        s = str(self.ungridded_data_list)
        assert_that(s == "UngriddedDataList: \n0: Ungridded data: rainfall_flux / (kg m-2 s-1) \n"
                         "1: Ungridded data: snowfall_flux / (kg m-2 s-1) \nCoordinates: \n  latitude\n  longitude\n")

    def test_GIVEN_data_containing_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(self):
        unique_coords = self.ungridded_data_list.coords()
        assert_that(len(unique_coords), is_(2))
        assert_that(isinstance(unique_coords, CoordList))
        coord_names = [coord.standard_name for coord in unique_coords]
        assert_that(coord_names, contains_inanyorder('latitude', 'longitude'))

    def test_can_create_list_from_generators_and_other_iterators(self):
        from cis.test.util.mock import make_regular_2d_ungridded_data
        import itertools
        another_list = UngriddedDataList((make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()))
        assert_that(len(another_list) == 2)

        dict = {1: [make_regular_2d_ungridded_data()], 2: [make_regular_2d_ungridded_data()]}
        another_list = UngriddedDataList(itertools.chain.from_iterable(d for d in dict.values()))
        assert_that(len(another_list) == 2)

    @skip_pandas
    def test_GIVEN_multiple_ungridded_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self):

        df = self.ungridded_data_list.as_data_frame()

        assert_that(df['rainfall_flux'][5] == 6)
        assert_almost_equal(df['snowfall_flux'][5], 0.6)
        assert_that(df['latitude'][13] == 10)
        assert_that(df['longitude'][0] == -5)

    @skip_pandas
    def test_GIVEN_multiple_ungridded_data_with_missing_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self):
        d = np.reshape(np.arange(15) + 10.0, (5, 3))

        data = np.ma.masked_array(d, np.zeros(d.shape, dtype=bool))
        data.mask[1,2] = True

        ug3 = UngriddedData(data, Metadata(name='hail', long_name="TOTAL HAIL RATE: LS+CONV KG/M2/S",
                                           units="kg m-2 s-1", missing_value=-999), self.coords)

        self.ungridded_data_list.append(ug3)

        df = self.ungridded_data_list.as_data_frame()

        assert_that(df['rainfall_flux'][5] == 6)
        assert_almost_equal(df['snowfall_flux'][5], 0.6)
        assert_that(df['latitude'][13] == 10)
        assert_that(df['longitude'][0] == -5)
        assert_almost_equal(df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][1], 11.0)
        assert_that(np.isnan(df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][np.ravel_multi_index([1, 2], (5, 3))]))

        self.ungridded_data_list.pop()
Пример #29
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes a list of HyperPoints and a data object (currently either Ungridded
        data or a Cube) and returns one new LazyData object with the values as determined by the
        constraint and kernel objects. The metadata for the output LazyData object is copied from
        the input data object.

        :param points: UngriddedData or UngriddedCoordinates defining the sample points
        :param data: An UngriddedData object or Cube, or any other object containing metadata that
                     the constraint object can read. May also be a list of objects, in which case a list will
                     be returned
        :param constraint: An instance of a Constraint subclass which takes a data object and
                           returns a subset of that data based on it's internal parameters
        :param kernel: An instance of a Kernel subclass which takes a number of points and returns
                       a single value
        :return: A single LazyData object
        """
        log_memory_profile("GeneralUngriddedCollocator Initial")

        if isinstance(data, list):
            # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration,
            # so we really can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.extend(self.collocate(points, var, constraint, kernel))
            return output

        metadata = data.metadata

        sample_points = points.get_all_points()

        # Convert ungridded data to a list of points if kernel needs it.
        # Special case checks for kernels that use a cube - this could be done more elegantly.
        if isinstance(kernel, nn_gridded) or isinstance(kernel, li):
            if hasattr(kernel, "interpolator"):
                # If we have an interpolator on the kernel we need to reset it as it depends on the actual values
                #  as well as the coordinates
                kernel.interpolator = None
                kernel.coord_names = []
            if not isinstance(data, iris.cube.Cube):
                raise ValueError("Ungridded data cannot be used with kernel nn_gridded or li")
            if constraint is not None and not isinstance(constraint, DummyConstraint):
                raise ValueError("A constraint cannot be specified with kernel nn_gridded or li")
            data_points = data
        else:
            data_points = data.get_non_masked_points()

        # First fix the sample points so that they all fall within the same 360 degree longitude range
        _fix_longitude_range(points.coords(), sample_points)
        # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points
        _fix_longitude_range(points.coords(), data_points)

        log_memory_profile("GeneralUngriddedCollocator after data retrieval")

        # Create index if constraint and/or kernel require one.
        coord_map = None
        data_index.create_indexes(constraint, points, data_points, coord_map)
        data_index.create_indexes(kernel, points, data_points, coord_map)
        log_memory_profile("GeneralUngriddedCollocator after indexing")

        logging.info("--> Collocating...")

        # Create output arrays.
        self.var_name = data.name()
        self.var_long_name = metadata.long_name
        self.var_standard_name = metadata.standard_name
        self.var_units = data.units
        var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name,
                                                      self.var_standard_name, self.var_units)
        sample_points_count = len(sample_points)
        values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value
        log_memory_profile("GeneralUngriddedCollocator after output array creation")

        logging.info("    {} sample points".format(sample_points_count))
        # Apply constraint and/or kernel to each sample point.
        cell_count = 0
        total_count = 0
        for i, point in sample_points.enumerate_non_masked_points():
            # Log progress periodically.
            cell_count += 1
            if cell_count == 1000:
                total_count += cell_count
                cell_count = 0
                logging.info("    Processed {} points of {}".format(total_count, sample_points_count))

            if constraint is None:
                con_points = data_points
            else:
                con_points = constraint.constrain_points(point, data_points)
            try:
                value_obj = kernel.get_value(point, con_points)
                # Kernel returns either a single value or a tuple of values to insert into each output variable.
                if isinstance(value_obj, tuple):
                    for idx, val in enumerate(value_obj):
                        if not np.isnan(val):
                            values[idx, i] = val
                else:
                    values[0, i] = value_obj
            except CoordinateMultiDimError as e:
                raise NotImplementedError(e)
            except ValueError as e:
                pass
        log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points")

        return_data = UngriddedDataList()
        for idx, var_details in enumerate(var_set_details):
            if idx == 0:
                new_data = UngriddedData(values[0, :], metadata, points.coords())
                new_data.metadata._name = var_details[0]
                new_data.metadata.long_name = var_details[1]
                cis.utils.set_cube_standard_name_if_valid(new_data, var_details[2])
                new_data.metadata.shape = (len(sample_points),)
                new_data.metadata.missing_value = self.fill_value
                new_data.units = var_details[2]
            else:
                var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),),
                                        missing_value=self.fill_value, units=var_details[2])
                new_data = UngriddedData(values[idx, :], var_metadata, points.coords())
            return_data.append(new_data)
        log_memory_profile("GeneralUngriddedCollocator final")

        return return_data