Exemplo n.º 1
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes a list of HyperPoints and a data object (currently either Ungridded
        data or a Cube) and returns one new LazyData object with the values as determined by the
        constraint and kernel objects. The metadata for the output LazyData object is copied from
        the input data object.

        :param UngriddedData or UngriddedCoordinates points: Objects defining the sample points
        :param GriddedData or GriddedDataList data: Data to resample
        :param constraint: An instance of a Constraint subclass which takes a data object and
                           returns a subset of that data based on it's internal parameters
        :param kernel: An instance of a Kernel subclass which takes a number of points and returns
                       a single value
        :return: A single LazyData object
        """
        from cis.collocation.gridded_interpolation import GriddedUngriddedInterpolator
        log_memory_profile("GriddedUngriddedCollocator Initial")

        if isinstance(data, list):
            # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration,
            # so we really can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.extend(self.collocate(points, var, constraint, kernel))
            return output

        if constraint is not None and not isinstance(constraint, DummyConstraint):
            raise ValueError("A constraint cannot be specified for the GriddedUngriddedCollocator")
        data_points = data

        # First fix the sample points so that they all fall within the same 360 degree longitude range
        _fix_longitude_range(points.coords(), points)
        # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points
        _fix_longitude_range(points.coords(), data_points)

        log_memory_profile("GriddedUngriddedCollocator after data retrieval")

        logging.info("--> Collocating...")
        logging.info("    {} sample points".format(points.size))

        if self.interpolator is None:
            # Cache the interpolator
            self.interpolator = GriddedUngriddedInterpolator(data, points, kernel, self.missing_data_for_missing_sample)

        values = self.interpolator(data, fill_value=self.fill_value, extrapolate=self.extrapolate)

        log_memory_profile("GriddedUngriddedCollocator after running kernel on sample points")

        metadata = Metadata(self.var_name or data.var_name, long_name=self.var_long_name or data.long_name,
                            shape=values.shape, missing_value=self.fill_value, units=self.var_units or data.units)
        set_standard_name_if_valid(metadata, data.standard_name)
        return_data = UngriddedDataList([UngriddedData(values, metadata, points.coords())])

        log_memory_profile("GriddedUngriddedCollocator final")

        return return_data
Exemplo n.º 2
0
 def test_combining(self):
     from cis.test.util.mock import make_regular_2d_ungridded_data
     another_list = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()])
     # Test adding
     assert_that(isinstance(self.ungridded_data_list + another_list, UngriddedDataList))
     # Test extending
     another_list.extend(self.ungridded_data_list)
     assert_that(isinstance(another_list, UngriddedDataList))
     assert_that(len(another_list) == 4)
     # Test can't add single items
     with assert_raises(TypeError):
         self.ungridded_data_list + another_list[0]
Exemplo n.º 3
0
 def test_combining(self):
     from cis.test.util.mock import make_regular_2d_ungridded_data
     another_list = UngriddedDataList([
         make_regular_2d_ungridded_data(),
         make_regular_2d_ungridded_data()
     ])
     # Test adding
     assert_that(
         isinstance(self.ungridded_data_list + another_list,
                    UngriddedDataList))
     # Test extending
     another_list.extend(self.ungridded_data_list)
     assert_that(isinstance(another_list, UngriddedDataList))
     assert_that(len(another_list) == 4)
     # Test can't add single items
     with assert_raises(TypeError):
         self.ungridded_data_list + another_list[0]
Exemplo n.º 4
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes a list of HyperPoints and a data object (currently either Ungridded
        data or a Cube) and returns one new LazyData object with the values as determined by the
        constraint and kernel objects. The metadata for the output LazyData object is copied from
        the input data object.

        :param UngriddedData or UngriddedCoordinates points: Object defining the sample points
        :param UngriddedData data: The source data to collocate from
        :param constraint: An instance of a Constraint subclass which takes a data object and
                           returns a subset of that data based on it's internal parameters
        :param kernel: An instance of a Kernel subclass which takes a number of points and returns
                       a single value
        :return UngriddedData or UngriddedDataList: Depending on the input
        """
        log_memory_profile("GeneralUngriddedCollocator Initial")

        if isinstance(data, list):
            # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration,
            # so we really can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.extend(self.collocate(points, var, constraint, kernel))
            return output

        # First fix the sample points so that they all fall within the same 360 degree longitude range
        _fix_longitude_range(points.coords(), points)
        # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points
        _fix_longitude_range(points.coords(), data)

        # Convert to dataframes for fancy indexing
        sample_points = points.as_data_frame(time_index=False, name='vals')
        data_points = data.as_data_frame(time_index=False, name='vals').dropna(axis=0)

        log_memory_profile("GeneralUngriddedCollocator after data retrieval")

        # Create index if constraint and/or kernel require one.
        coord_map = None
        data_index.create_indexes(constraint, points, data_points, coord_map)
        log_memory_profile("GeneralUngriddedCollocator after indexing")

        logging.info("--> Collocating...")

        # Create output arrays.
        self.var_name = data.var_name
        self.var_long_name = data.long_name
        self.var_standard_name = data.standard_name
        self.var_units = data.units
        var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name,
                                                      self.var_standard_name, self.var_units)

        sample_points_count = len(sample_points)
        # Create an empty masked array to store the collocated values. The elements will be unmasked by assignment.
        values = np.ma.masked_all((len(var_set_details), sample_points_count))
        values.fill_value = self.fill_value
        log_memory_profile("GeneralUngriddedCollocator after output array creation")

        logging.info("    {} sample points".format(sample_points_count))
        # Apply constraint and/or kernel to each sample point.

        if isinstance(kernel, nn_horizontal_only):
            # Only find the nearest point using the kd-tree, without constraint in other dimensions
            nearest_points = data_points.iloc[constraint.haversine_distance_kd_tree_index.find_nearest_point(sample_points)]
            values[0, :] = nearest_points.vals.values
        else:
            for i, point, con_points in constraint.get_iterator(self.missing_data_for_missing_sample, None, None,
                                                                data_points, None, sample_points, None):

                try:
                    values[:, i] = kernel.get_value(point, con_points)
                    # Kernel returns either a single value or a tuple of values to insert into each output variable.
                except CoordinateMultiDimError as e:
                    raise NotImplementedError(e)
                except ValueError as e:
                    pass
        log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points")

        # Mask any bad values
        values = np.ma.masked_invalid(values)

        return_data = UngriddedDataList()
        for idx, var_details in enumerate(var_set_details):
            var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),),
                                    missing_value=self.fill_value, units=var_details[3])
            set_standard_name_if_valid(var_metadata, var_details[2])
            return_data.append(UngriddedData(values[idx, :], var_metadata, points.coords()))
        log_memory_profile("GeneralUngriddedCollocator final")

        return return_data
Exemplo n.º 5
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes a list of HyperPoints and a data object (currently either Ungridded
        data or a Cube) and returns one new LazyData object with the values as determined by the
        constraint and kernel objects. The metadata for the output LazyData object is copied from
        the input data object.

        :param points: UngriddedData or UngriddedCoordinates defining the sample points
        :param data: An UngriddedData object or Cube, or any other object containing metadata that
                     the constraint object can read. May also be a list of objects, in which case a list will
                     be returned
        :param constraint: An instance of a Constraint subclass which takes a data object and
                           returns a subset of that data based on it's internal parameters
        :param kernel: An instance of a Kernel subclass which takes a number of points and returns
                       a single value
        :return: A single LazyData object
        """
        log_memory_profile("GeneralUngriddedCollocator Initial")

        if isinstance(data, list):
            # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration,
            # so we really can just call this method recursively if we've got a list of data.
            output = UngriddedDataList()
            for var in data:
                output.extend(self.collocate(points, var, constraint, kernel))
            return output

        metadata = data.metadata

        sample_points = points.get_all_points()

        # Convert ungridded data to a list of points if kernel needs it.
        # Special case checks for kernels that use a cube - this could be done more elegantly.
        if isinstance(kernel, nn_gridded) or isinstance(kernel, li):
            if hasattr(kernel, "interpolator"):
                # If we have an interpolator on the kernel we need to reset it as it depends on the actual values
                #  as well as the coordinates
                kernel.interpolator = None
                kernel.coord_names = []
            if not isinstance(data, iris.cube.Cube):
                raise ValueError("Ungridded data cannot be used with kernel nn_gridded or li")
            if constraint is not None and not isinstance(constraint, DummyConstraint):
                raise ValueError("A constraint cannot be specified with kernel nn_gridded or li")
            data_points = data
        else:
            data_points = data.get_non_masked_points()

        # First fix the sample points so that they all fall within the same 360 degree longitude range
        _fix_longitude_range(points.coords(), sample_points)
        # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points
        _fix_longitude_range(points.coords(), data_points)

        log_memory_profile("GeneralUngriddedCollocator after data retrieval")

        # Create index if constraint and/or kernel require one.
        coord_map = None
        data_index.create_indexes(constraint, points, data_points, coord_map)
        data_index.create_indexes(kernel, points, data_points, coord_map)
        log_memory_profile("GeneralUngriddedCollocator after indexing")

        logging.info("--> Collocating...")

        # Create output arrays.
        self.var_name = data.name()
        self.var_long_name = metadata.long_name
        self.var_standard_name = metadata.standard_name
        self.var_units = data.units
        var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name,
                                                      self.var_standard_name, self.var_units)
        sample_points_count = len(sample_points)
        values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value
        log_memory_profile("GeneralUngriddedCollocator after output array creation")

        logging.info("    {} sample points".format(sample_points_count))
        # Apply constraint and/or kernel to each sample point.
        cell_count = 0
        total_count = 0
        for i, point in sample_points.enumerate_non_masked_points():
            # Log progress periodically.
            cell_count += 1
            if cell_count == 1000:
                total_count += cell_count
                cell_count = 0
                logging.info("    Processed {} points of {}".format(total_count, sample_points_count))

            if constraint is None:
                con_points = data_points
            else:
                con_points = constraint.constrain_points(point, data_points)
            try:
                value_obj = kernel.get_value(point, con_points)
                # Kernel returns either a single value or a tuple of values to insert into each output variable.
                if isinstance(value_obj, tuple):
                    for idx, val in enumerate(value_obj):
                        if not np.isnan(val):
                            values[idx, i] = val
                else:
                    values[0, i] = value_obj
            except CoordinateMultiDimError as e:
                raise NotImplementedError(e)
            except ValueError as e:
                pass
        log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points")

        return_data = UngriddedDataList()
        for idx, var_details in enumerate(var_set_details):
            if idx == 0:
                new_data = UngriddedData(values[0, :], metadata, points.coords())
                new_data.metadata._name = var_details[0]
                new_data.metadata.long_name = var_details[1]
                cis.utils.set_cube_standard_name_if_valid(new_data, var_details[2])
                new_data.metadata.shape = (len(sample_points),)
                new_data.metadata.missing_value = self.fill_value
                new_data.units = var_details[2]
            else:
                var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),),
                                        missing_value=self.fill_value, units=var_details[2])
                new_data = UngriddedData(values[idx, :], var_metadata, points.coords())
            return_data.append(new_data)
        log_memory_profile("GeneralUngriddedCollocator final")

        return return_data