Пример #1
0
    def _create_coord_list(self):
        from cis.data_io.Coord import Coord, CoordList
        from cis.data_io.ungridded_data import Metadata
        from cis.time_util import cis_standard_time_unit as cstu

        # These implement a lot of what is necessary, but aren't in CIS style
        from acp_utils import rolling_window
        from orbit import ATSR

        lat_data = []
        lon_data = []
        time_data = []
        for fname in self.filenames:
            prod = ATSR(fname)

            lat_data.append(prod.lat)
            lon_data.append(prod.lon)
            time_data.append(prod.get_time())

        # TODO: Properly define metadata
        lat_meta = Metadata(standard_name="latitude", units="degrees")
        lon_meta = Metadata(standard_name="longitude", units="degrees")
        time_meta = Metadata(standard_name="time", units=cstu)

        lat = Coord(concatenate(lat_data), lat_meta, "Y")
        lat.update_shape()
        lat.update_range()
        lon = Coord(concatenate(lon_data), lon_meta, "Y")
        lon.update_shape()
        lon.update_range()
        time = Coord(concatenate(time_data), time_meta, "T")
        time.update_shape()
        time.update_range()

        return CoordList([lat, lon, time])
Пример #2
0
    def _create_bounded_coord_list(self):
        from acp_utils import rolling_window
        from orbit import ATSR

        coords = self._create_coord_list()

        lat_bounds = []
        lon_bounds = []
        time_bounds = []
        for fname in self.filenames:
            prod = ATSR(fname)

            lat_c = rolling_window(prod.lat_corner, (2, 2))
            lat_bounds.append(lat_c.reshape(prod.shape + (4, )))
            lon_c = rolling_window(prod.lon_corner, (2, 2))
            lon_bounds.append(lon_c.reshape(prod.shape + (4, )))
            t = prod.get_time()
            b = np.stack([t, np.roll(t, -1)], axis=2)
            b[-1, :, 1] = 2 * t[-1, :] - t[-2, :]
            time_bounds.append(b)

        coords[0].bounds = concatenate(lat_bounds).reshape(
            coords[0].data.shape + (4, ))
        coords[0].bounds[..., 2:4] = coords[0].bounds[..., [3, 2]]
        coords[1].bounds = concatenate(lon_bounds).reshape(
            coords[1].data.shape + (4, ))
        coords[1].bounds[..., 2:4] = coords[1].bounds[..., [3, 2]]
        coords[2].bounds = concatenate(time_bounds)

        return coords
Пример #3
0
def read_data(data_dict, data_type, missing_values=None):
    if data_type == 'VD':
        out = utils.concatenate([hdf_vd.get_data(i, missing_values=missing_values) for i in data_dict])
    elif data_type == 'SD':
        out = utils.concatenate([hdf_sd.get_data(i, missing_values=missing_values) for i in data_dict])
    else:
        raise ValueError("Invalid data-type: %s, HDF variables must be VD or SD only" % data_type)
    return out
Пример #4
0
    def SolZen(self):
        from orbit import ATSR
        from cis.data_io.ungridded_data import Metadata

        tmp = []
        for f in self.filenames:
            orbit = ATSR(f)

            # Get tie point grid
            sph = orbit._prod.get_sph()
            tie_field = sph.get_field("VIEW_ANGLE_TIE_POINTS")
            tie_pts = tie_field.get_elems()
            # Get tie point values
            scan_y = orbit._read_field("NADIR_VIEW_SOLAR_ANGLES_ADS",
                                       "img_scan_y")
            tie_solelv = orbit._read_field("NADIR_VIEW_SOLAR_ANGLES_ADS",
                                           "tie_pt_sol_elev")
            # Swath grid
            x = np.arange(512) - 255.5
            y = orbit._read_field("11500_12500_NM_NADIR_TOA_MDS", "img_scan_y")
            y[:-1] += 0.5 * (y[1:] - y[:-1])
            y[-1] += 0.5 * (y[-1] - y[-2])

            solelv = orbit.extrap_atsr_angle(tie_pts, scan_y, x, y, tie_solelv)
            tmp.append(90. - solelv)

        return concatenate(tmp), Metadata(standard_name="solar_zenith_angle",
                                          units="degrees")
Пример #5
0
    def _create_time_coord(self, timestamp, time_variable_name, data_variables, coord_axis='T', standard_name='time'):
        """
        Create a time coordinate, taking into account the fact that each file may have a different timestamp.
        :param timestamp: Timestamp or list of timestamps for
        :param time_variable_name: Name of the time variable
        :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name
        :param coord_axis: Axis, default 'T'
        :param standard_name: Coord standard name, default 'time'
        :return: Coordinate
        """
        from iris.coords import AuxCoord
        from six.moves import zip_longest
        from cis.time_util import convert_time_using_time_stamp_info_to_std_time as convert, cis_standard_time_unit
        from cis.utils import concatenate

        timestamps = listify(timestamp)
        time_variables = data_variables[time_variable_name]
        time_data = []
        # Create a coordinate for each separate file to account for differing timestamps
        for file_time_var, timestamp in zip_longest(time_variables, timestamps):
            metadata = get_metadata(file_time_var)
            if timestamp is not None:
                time_d = convert(file_time_var[:], metadata.units, timestamp)
            else:
                time_d = metadata.units.convert(file_time_var[:], cis_standard_time_unit)
            time_data.append(time_d)

        return AuxCoord(concatenate(time_data), standard_name=standard_name, units=cis_standard_time_unit)
Пример #6
0
    def create_coords(self, filenames, variable=None):
        """
        Reads the coordinates and data if required from the files
        :param filenames: List of filenames to read coordinates from
        :param variable: load a variable for the data
        :return: Coordinates
        """
        from iris.cube import Cube
        from iris.coords import DimCoord
        from cis.data_io.netcdf import read
        from cis.utils import concatenate

        data_variables, variable_selector = self._load_data(filenames, variable)

        aux_coords = self._create_coordinates_list(data_variables, variable_selector)
        dim_coords = [(DimCoord(np.arange(len(aux_coords[0].points)), var_name='obs'), (0,))]

        if variable is None:
            raise ValueError("Must specify variable")

        aux_coord_name = variable_selector.find_auxiliary_coordinate(variable)
        if aux_coord_name is not None:
            # We assume that the auxilliary coordinate is the same shape across files
            v = read(filenames[0], [aux_coord_name])[aux_coord_name]
            aux_meta = get_metadata(v)
            # We have to assume the shape here...
            dim_coords.append((DimCoord(v[:], var_name=aux_coord_name, units=aux_meta.units,
                                    long_name=aux_meta.long_name), (1,)))

        cube_meta = get_metadata(data_variables[variable][0])
        return Cube(concatenate([d[:] for d in data_variables[variable]]),
                    units=cube_meta.units, var_name=variable, long_name=cube_meta.long_name,
                    dim_coords_and_dims=dim_coords, aux_coords_and_dims=[(c, (0,)) for c in aux_coords])
Пример #7
0
Файл: hdf.py Проект: cedadev/cis
def read_data(data_list, read_function):
    """
    Wrapper for calling an HDF reading function for each dataset, and then concatenating the result.

    :param list data_list: A list of data objects to read
    :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines.
    :return: A single numpy array of concatenated data values.
    """
    if callable(read_function):
        out = utils.concatenate([read_function(i) for i in data_list])
    elif read_function == 'VD':
        out = utils.concatenate([hdf_vd.get_data(i) for i in data_list])
    elif read_function == 'SD':
        out = utils.concatenate([hdf_sd.get_data(i) for i in data_list])
    else:
        raise ValueError("Invalid read-function: {}, please supply a callable read "
                         "function, 'VD' or 'SD' only".format(read_function))
    return out
Пример #8
0
    def _create_coord_list(self, filenames):
        import numpy as np
        from cis.time_util import calculate_mid_time, cis_standard_time_unit

        variables = ["XDim", "YDim"]
        logging.info("Listing coordinates: " + str(variables))

        sdata, vdata = hdf.read(filenames, variables)

        lat = sdata["YDim"]
        lat_metadata = hdf.read_metadata(lat, "SD")

        lon = sdata["XDim"]
        lon_metadata = hdf.read_metadata(lon, "SD")

        # expand lat and lon data array so that they have the same shape
        lat_data = utils.expand_1d_to_2d_array(
            hdf.read_data(lat, "SD"), lon_metadata.shape, axis=1
        )  # expand latitude column wise
        lon_data = utils.expand_1d_to_2d_array(
            hdf.read_data(lon, "SD"), lat_metadata.shape, axis=0
        )  # expand longitude row wise

        lat_metadata.shape = lat_data.shape
        lon_metadata.shape = lon_data.shape

        # to make sure "Latitude" and "Longitude", i.e. the standard_name is displayed instead of "YDim"and "XDim"
        lat_metadata.standard_name = "latitude"
        lat_metadata._name = ""
        lon_metadata.standard_name = "longitude"
        lon_metadata._name = ""

        # create arrays for time coordinate using the midpoint of the time delta between the start date and the end date
        time_data_array = []
        for filename in filenames:
            mid_datetime = calculate_mid_time(self._get_start_date(filename), self._get_end_date(filename))
            logging.debug("Using " + str(mid_datetime) + " as datetime for file " + str(filename))
            # Only use part of the full lat shape as it has already been concatenated
            time_data = np.empty((lat_metadata.shape[0] / len(filenames), lat_metadata.shape[1]), dtype="float64")
            time_data.fill(mid_datetime)
            time_data_array.append(time_data)
        time_data = utils.concatenate(time_data_array)
        time_metadata = Metadata(
            name="DateTime",
            standard_name="time",
            shape=time_data.shape,
            units=str(cis_standard_time_unit),
            calendar=cis_standard_time_unit.calendar,
        )

        coords = CoordList()
        coords.append(Coord(lon_data, lon_metadata, "X"))
        coords.append(Coord(lat_data, lat_metadata, "Y"))
        coords.append(Coord(time_data, time_metadata, "T"))

        return coords
Пример #9
0
def read_data(data_list, read_function):
    """
    Wrapper for calling an HDF reading function for each dataset, and then concatenating the result.

    :param list data_list: A list of data objects to read
    :param callable or str read_function: A function for reading the data, or 'SD' or 'VD' for default reading routines.
    :return: A single numpy array of concatenated data values.
    """
    if callable(read_function):
        out = utils.concatenate([read_function(i) for i in data_list])
    elif read_function == 'VD':
        out = utils.concatenate([hdf_vd.get_data(i) for i in data_list])
    elif read_function == 'SD':
        out = utils.concatenate([hdf_sd.get_data(i) for i in data_list])
    else:
        raise ValueError(
            "Invalid read-function: {}, please supply a callable read "
            "function, 'VD' or 'SD' only".format(read_function))
    return out
Пример #10
0
    def from_many_coordinates(cls, coords):
        """
        Create a single coordinate object from the concatenation of all of the coordinate objects in the input list,
        updating the shape as appropriate

        :param coords: A list of coordinate objects to be combined
        :return: A single :class:`Coord` object
        """
        from cis.utils import concatenate
        data = concatenate([ug.data for ug in coords])
        metadata = coords[0].metadata  # Use the first file as a master for the metadata...
        metadata.shape = data.shape  # But update the shape
        return cls(data, metadata, coords[0].axis)
Пример #11
0
def hdf_read(filenames, variable, start=None, count=None, stride=None):
    """Returns variable, concatenated over a sequence of files."""
    from cis.data_io.hdf import read
    from cis.data_io.hdf_sd import get_metadata
    from cis.utils import concatenate

    sdata, _ = read(filenames, variable)
    var = sdata[variable]
    data = concatenate(
        [_get_MODIS_SDS_data(i, start, count, stride) for i in var])
    metadata = get_metadata(var[0])

    return data, metadata
Пример #12
0
    def from_many_coordinates(cls, coords):
        """
        Create a single coordinate object from the concatenation of all of the coordinate objects in the input list,
        updating the shape as appropriate

        :param coords: A list of coordinate objects to be combined
        :return: A single :class:`Coord` object
        """
        from cis.utils import concatenate
        data = concatenate([ug.data for ug in coords])
        metadata = coords[
            0].metadata  # Use the first file as a master for the metadata...
        metadata.shape = data.shape  # But update the shape
        return cls(data, metadata, coords[0].axis)
Пример #13
0
def ncdf_read(filenames, variable, start=None, count=None, stride=None):
    """Returns variable, concatenated over a sequence of files."""
    from cis.data_io.netcdf import read, get_metadata
    from cis.utils import concatenate, listify

    data = []
    for f in listify(filenames):
        sdata = read(f, variable)
        var = sdata[variable]
        data.append(_tidy_ncdf_data(var, start, count, stride))

    metadata = get_metadata(var)

    return concatenate(data), metadata
Пример #14
0
    def create_coords(self, filenames, variable=None):
        """
        Override the default read-in to also read in CCN quality flag data and apply the appropriate mask. We have
        to do this before creating the UngriddedData object so that the missing coords don't get fixed first
        """
        from cis.data_io.netcdf import read_many_files_individually, get_metadata
        from cis.utils import apply_mask_to_numpy_array, concatenate
        from cis.data_io.ungridded_data import UngriddedCoordinates, UngriddedData

        data_variables, variable_selector = self._load_data(filenames, variable)

        dim_coords = self._create_coordinates_list(data_variables, variable_selector)

        if variable is None:
            return UngriddedCoordinates(dim_coords)
        else:
            aux_coord_name = variable_selector.find_auxiliary_coordinate(variable)
            if aux_coord_name is not None:
                all_coords = self._add_aux_coordinate(dim_coords, filenames[0], aux_coord_name,
                                                      dim_coords.get_coord(standard_name='time').data.size)
            else:
                all_coords = dim_coords

            var_data = data_variables[variable]
            if variable and variable.startswith('CCN_COL'):
                # Work out the associated variable name for this column
                ccn_flag_var = "COL{}_FLAG".format(variable[-1])
                # Read in the flags
                flags = concatenate([get_data(v) for v in read_many_files_individually(filenames, ccn_flag_var)[
                    ccn_flag_var]])
                # 0 and 1 are both OK
                mask = flags > 1
                # If a variable was supplied then coords must be an ungridded data object, apply the mask to it
                var_data = apply_mask_to_numpy_array(concatenate([get_data(v) for v in var_data]), mask)

            return UngriddedData(var_data, get_metadata(data_variables[variable][0]), all_coords)
Пример #15
0
    def _generate_time_array(self, vdata):
        import cis.data_io.hdf_vd as hdf_vd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time

        Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0)

        arrays = []
        for i, j in zip(vdata['Profile_time'], vdata['TAI_start']):
            time = hdf_vd.get_data(i)
            start = hdf_vd.get_data(j)
            time += start
            # Do the conversion to standard time here before we expand the time array...
            time = convert_sec_since_to_std_time(time, Cloudsat_start_time)
            arrays.append(time)
        return utils.concatenate(arrays)
Пример #16
0
    def _create_coord(self, coord_axis, data_variable_name, data_variables, standard_name):
        """
        Create a coordinate for the co-ordinate list
        :param coord_axis: axis of the coordinate in the coords
        :param data_variable_name: the name of the variable in the data
        :param data_variables: the data variables
        :param standard_name: the standard name it should have
        :return: a coords object
        """
        from iris.coords import AuxCoord
        from cis.utils import concatenate
        data = concatenate([d[:] for d in data_variables[data_variable_name]])

        m = get_metadata(data_variables[data_variable_name][0])

        return AuxCoord(data, units=m.units, standard_name=standard_name)
Пример #17
0
    def _generate_time_array(self, vdata):
        import cis.data_io.hdf_vd as hdf_vd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time

        Cloudsat_start_time = dt.datetime(1993, 1, 1, 0, 0, 0)

        arrays = []
        for i, j in zip(vdata['Profile_time'], vdata['TAI_start']):
            time = hdf_vd.get_data(i)
            start = hdf_vd.get_data(j)
            time += start
            # Do the conversion to standard time here before we expand the time array...
            time = convert_sec_since_to_std_time(time, Cloudsat_start_time)
            arrays.append(time)
        return utils.concatenate(arrays)
Пример #18
0
    def _create_coord_list(self, filenames, variable=None):
        import datetime as dt
        from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit
        from cis.utils import concatenate
        from cf_units import Unit
        from geotiepoints import modis5kmto1km

        variables = ['Latitude', 'Longitude', 'View_time']
        logging.info("Listing coordinates: " + str(variables))

        sdata, vdata = hdf.read(filenames, variables)

        apply_interpolation = False
        if variable is not None:
            scale = self.__get_data_scale(filenames[0], variable)
            apply_interpolation = True if scale is "1km" else False

        lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data)
        lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD")

        lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data)
        lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD")

        if apply_interpolation:
            lon_data, lat_data = modis5kmto1km(lon_data, lat_data)

        lat_coord = Coord(lat_data, lat_metadata, 'Y')
        lon_coord = Coord(lon_data, lon_metadata, 'X')

        time = sdata['View_time']
        time_metadata = hdf.read_metadata(time, "SD")
        # Ensure the standard name is set
        time_metadata.standard_name = 'time'
        time_metadata.units = cis_standard_time_unit

        t_arrays = []
        for f, d in zip(filenames, time):
            time_start = self._get_start_date(f)
            t_data = _get_MODIS_SDS_data(
                d) / 24.0  # Convert hours since to days since
            t_offset = time_start - dt.datetime(1600, 1,
                                                1)  # Convert to CIS time
            t_arrays.append(t_data + t_offset.days)

        time_coord = Coord(concatenate(t_arrays), time_metadata, "T")

        return CoordList([lat_coord, lon_coord, time_coord])
Пример #19
0
    def _create_coord(self, coord_axis, data_variable_name, data_variables,
                      standard_name):
        """
        Create a coordinate for the co-ordinate list
        :param coord_axis: axis of the coordinate in the coords
        :param data_variable_name: the name of the variable in the data
        :param data_variables: the data variables
        :param standard_name: the standard name it should have
        :return: a coords object
        """
        from iris.coords import AuxCoord
        from cis.utils import concatenate
        data = concatenate([d[:] for d in data_variables[data_variable_name]])

        m = get_metadata(data_variables[data_variable_name][0])

        return AuxCoord(data, units=m.units, standard_name=standard_name)
Пример #20
0
    def _create_coord_list(self, filenames, variable=None):
        import datetime as dt
        from cis.time_util import convert_time_since_to_std_time, cis_standard_time_unit
        from cis.utils import concatenate
        from cf_units import Unit
        from geotiepoints import modis5kmto1km

        variables = ['Latitude', 'Longitude', 'View_time']
        logging.info("Listing coordinates: " + str(variables))

        sdata, vdata = hdf.read(filenames, variables)

        apply_interpolation = False
        if variable is not None:
            scale = self.__get_data_scale(filenames[0], variable)
            apply_interpolation = True if scale is "1km" else False

        lat_data = hdf.read_data(sdata['Latitude'], _get_MODIS_SDS_data)
        lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD")

        lon_data = hdf.read_data(sdata['Longitude'], _get_MODIS_SDS_data)
        lon_metadata = hdf.read_metadata(sdata['Longitude'], "SD")

        if apply_interpolation:
            lon_data, lat_data = modis5kmto1km(lon_data, lat_data)

        lat_coord = Coord(lat_data, lat_metadata, 'Y')
        lon_coord = Coord(lon_data, lon_metadata, 'X')

        time = sdata['View_time']
        time_metadata = hdf.read_metadata(time, "SD")
        # Ensure the standard name is set
        time_metadata.standard_name = 'time'
        time_metadata.units = cis_standard_time_unit

        t_arrays = []
        for f, d in zip(filenames, time):
            time_start = self._get_start_date(f)
            t_data = _get_MODIS_SDS_data(d) / 24.0  # Convert hours since to days since
            t_offset = time_start - dt.datetime(1600, 1, 1)  # Convert to CIS time
            t_arrays.append(t_data + t_offset.days)

        time_coord = Coord(concatenate(t_arrays), time_metadata, "T")

        return CoordList([lat_coord, lon_coord, time_coord])
Пример #21
0
def load_multiple_aeronet(filenames, variables=None):
    from cis.utils import add_element_to_list_in_dict, concatenate

    adata = {}

    for filename in filenames:
        logging.debug("reading file: " + filename)

        # reading in all variables into a dictionary:
        # a_dict, key: variable name, value: list of masked arrays
        a_dict = load_aeronet(filename, variables)
        for var in list(a_dict.keys()):
            add_element_to_list_in_dict(adata, var, a_dict[var])

    for var in list(adata.keys()):
        adata[var] = concatenate(adata[var])

    return adata
Пример #22
0
    def _create_bounded_coord_list(self):
        from cis.data_io.Coord import Coord, CoordList
        from cis.data_io.ungridded_data import Metadata
        from cis.time_util import cis_standard_time_unit as cstu

        # These implement a lot of what is necessary, but aren't in CIS style
        from acp_utils import rolling_window
        from orbit import MODIS

        lat_data = []
        lat_bounds = []
        lon_data = []
        lon_bounds = []
        time_data = []
        time_bounds = []
        for fname in self.filenames:
            prod = MODIS(fname)

            lat_data.append(prod.lat)
            lon_data.append(prod.lon)
            lat_c = rolling_window(prod.lat_corner, (2, 2))
            lat_bounds.append(lat_c.reshape(prod.shape + (4, )))
            lon_c = rolling_window(prod.lon_corner, (2, 2))
            lon_bounds.append(lon_c.reshape(prod.shape + (4, )))
            t = prod.get_time()
            time_data.append(t)
            b = np.stack([t, np.roll(t, -1)], axis=2)
            b[-1, :, 1] = 2 * t[-1, :] - t[-2, :]
            time_bounds.append(b)

        # TODO: Properly define metadata
        lat_meta = Metadata(standard_name="latitude", units="degrees")
        lon_meta = Metadata(standard_name="longitude", units="degrees")
        time_meta = Metadata(standard_name="time", units=cstu)

        lat = Coord(concatenate(lat_data), lat_meta, "Y")
        lat.update_shape()
        lat.update_range()
        lat.bounds = concatenate(lat_bounds).reshape(lat.shape + (4, ))
        lat.bounds[..., 2:4] = lat.bounds[..., [3, 2]]
        lon = Coord(concatenate(lon_data), lon_meta, "Y")
        lon.update_shape()
        lon.update_range()
        lon.bounds = concatenate(lon_bounds).reshape(lon.shape + (4, ))
        lon.bounds[..., 2:4] = lon.bounds[..., [3, 2]]
        time = Coord(concatenate(time_data), time_meta, "T")
        time.update_shape()
        time.update_range()
        time.bounds = concatenate(time_bounds)

        return CoordList([lat, lon, time])
Пример #23
0
    def create_coords(self, filenames, variable=None):
        from cis.data_io.ungridded_data import Metadata
        from numpy import genfromtxt, NaN
        from cis.exceptions import InvalidVariableError
        from cis.time_util import convert_datetime_to_std_time
        import dateutil.parser as du

        array_list = []

        for filename in filenames:
            try:
                array_list.append(genfromtxt(filename, dtype="f8,f8,f8,O,f8",
                                             names=['latitude', 'longitude', 'altitude', 'time', 'value'],
                                             delimiter=',', missing_values='', usemask=True, invalid_raise=True,
                                             converters={"time": du.parse}))
            except:
                raise IOError('Unable to read file ' + filename)

        data_array = utils.concatenate(array_list)
        n_elements = len(data_array['latitude'])

        coords = CoordList()
        coords.append(Coord(data_array["latitude"],
                            Metadata(standard_name="latitude", shape=(n_elements,), units="degrees_north")))
        coords.append(Coord(data_array["longitude"],
                            Metadata(standard_name="longitude", shape=(n_elements,), units="degrees_east")))
        coords.append(
            Coord(data_array["altitude"], Metadata(standard_name="altitude", shape=(n_elements,), units="meters")))

        time_arr = convert_datetime_to_std_time(data_array["time"])
        time = Coord(time_arr,
                     Metadata(standard_name="time", shape=(n_elements,), units="days since 1600-01-01 00:00:00"))
        coords.append(time)

        if variable:
            try:
                data = UngriddedData(data_array['value'], Metadata(name="value", shape=(n_elements,), units="unknown",
                                                                   missing_value=NaN), coords)
            except:
                InvalidVariableError("Value column does not exist in file " + filenames)
            return data
        else:
            return UngriddedCoordinates(coords)
Пример #24
0
    def _create_coord(self, coord_axis, data_variable_name, data_variables,
                      standard_name):
        """
        Create a coordinate for the co-ordinate list
        :param coord_axis: axis of the coordinate in the coords
        :param data_variable_name: the name of the variable in the data
        :param data_variables: the data variables
        :param standard_name: the standard name it should have
        :return: a coords object
        """
        from cis.data_io.netcdf import get_metadata
        from iris.coords import AuxCoord
        from cis.utils import concatenate
        from cf_units import Unit
        import logging

        data = concatenate(
            [get_data(d) for d in data_variables[data_variable_name]])

        m = get_metadata(data_variables[data_variable_name][0])
        m._name = m._name.lower()
        m.standard_name = standard_name
        if standard_name == 'air_pressure':
            if not isinstance(m.units, Unit):
                if ',' in m.units:
                    # Try splitting any commas out
                    m.units = m.units.split(',')[0]
                if ' ' in m.units:
                    # Try splitting any spaces out
                    m.units = m.units.split()[0]
            if str(m.units) == 'mb' or str(m.units) == 'Mb':
                # Try converting to standard nomencleture
                m.units = 'mbar'
            if str(m.units) == 'hpa':
                m.units = 'hPa'

            logging.info("Parsed air pressure units {old}".format(old=m.units))
            logging.info('Converting to hPa')
            if not isinstance(m.units, str):
                data = m.units.convert(data, 'hPa')
                m.units = 'hPa'

        return AuxCoord(data, units=m.units, standard_name=standard_name)
Пример #25
0
    def create_coords(self, filenames, variable=None):
        from cis.data_io.ungridded_data import Metadata
        from numpy import genfromtxt, NaN
        from cis.exceptions import InvalidVariableError
        from cis.time_util import convert_datetime_to_std_time
        import dateutil.parser as du

        array_list = []

        for filename in filenames:
            try:
                array_list.append(genfromtxt(filename, dtype="f8,f8,f8,O,f8",
                                             names=['latitude', 'longitude', 'altitude', 'time', 'value'],
                                             delimiter=',', missing_values='', usemask=True, invalid_raise=True,
                                             converters={"time": du.parse}))
            except:
                raise IOError('Unable to read file ' + filename)

        data_array = utils.concatenate(array_list)
        n_elements = len(data_array['latitude'])

        coords = CoordList()
        coords.append(Coord(data_array["latitude"],
                            Metadata(standard_name="latitude", shape=(n_elements,), units="degrees_north")))
        coords.append(Coord(data_array["longitude"],
                            Metadata(standard_name="longitude", shape=(n_elements,), units="degrees_east")))
        coords.append(
            Coord(data_array["altitude"], Metadata(standard_name="altitude", shape=(n_elements,), units="meters")))

        time_arr = convert_datetime_to_std_time(data_array["time"])
        time = Coord(time_arr,
                     Metadata(standard_name="time", shape=(n_elements,), units="days since 1600-01-01 00:00:00"))
        coords.append(time)

        if variable:
            try:
                data = UngriddedData(data_array['value'], Metadata(name="value", shape=(n_elements,), units="unknown",
                                                                   missing_value=NaN), coords)
            except:
                InvalidVariableError("Value column does not exist in file " + filenames)
            return data
        else:
            return UngriddedCoordinates(coords)
Пример #26
0
    def create_coords(self, filenames, variable=None):
        """
        Reads the coordinates and data if required from the files
        :param filenames: List of filenames to read coordinates from
        :param variable: load a variable for the data
        :return: Coordinates
        """
        from iris.cube import Cube
        from iris.coords import DimCoord
        from cis.data_io.netcdf import read
        from cis.utils import concatenate

        data_variables, variable_selector = self._load_data(
            filenames, variable)

        aux_coords = self._create_coordinates_list(data_variables,
                                                   variable_selector)
        dim_coords = [(DimCoord(np.arange(len(aux_coords[0].points)),
                                var_name='obs'), (0, ))]

        if variable is None:
            raise ValueError("Must specify variable")

        aux_coord_name = variable_selector.find_auxiliary_coordinate(variable)
        if aux_coord_name is not None:
            # We assume that the auxilliary coordinate is the same shape across files
            v = read(filenames[0], [aux_coord_name])[aux_coord_name]
            aux_meta = get_metadata(v)
            # We have to assume the shape here...
            dim_coords.append((DimCoord(v[:],
                                        var_name=aux_coord_name,
                                        units=aux_meta.units,
                                        long_name=aux_meta.long_name), (1, )))

        cube_meta = get_metadata(data_variables[variable][0])
        return Cube(concatenate([d[:] for d in data_variables[variable]]),
                    units=cube_meta.units,
                    var_name=variable,
                    long_name=cube_meta.long_name,
                    dim_coords_and_dims=dim_coords,
                    aux_coords_and_dims=[(c, (0, )) for c in aux_coords])
Пример #27
0
def load_multiple_hysplit(fnames, variables=None):
    from cis.utils import add_element_to_list_in_dict, concatenate

    hdata = {}

    for filename in fnames:
        logging.debug("reading file: " + filename)

        # read in all trajectories
        # h_dict, key: trajectory starting lat/lon/altm value: dict containing trajectory data
        h_dict = load_hysplit(filename, variables)
        for traj in list(h_dict.keys()):
            if (traj in hdata):
                for var in list(h_dict[traj].keys()):
                    # TODO error appending masked array! add these manually
                    add_element_to_list_in_dict(hdata[traj], var, h_dict[traj][var])
                for var in list(hdata[traj].keys()):
                    hdata[traj][var] = concatenate(hdata[traj][var])
            else:
                hdata[traj] = h_dict[traj]

    return hdata
Пример #28
0
    def _create_time_coord(self,
                           timestamp,
                           time_variable_name,
                           data_variables,
                           coord_axis='T',
                           standard_name='time'):
        """
        Create a time coordinate, taking into account the fact that each file may have a different timestamp.
        :param timestamp: Timestamp or list of timestamps for
        :param time_variable_name: Name of the time variable
        :param data_variables: Dictionary containing one or multiple netCDF data variables for each variable name
        :param coord_axis: Axis, default 'T'
        :param standard_name: Coord standard name, default 'time'
        :return: Coordinate
        """
        from iris.coords import AuxCoord
        from six.moves import zip_longest
        from cis.time_util import convert_time_using_time_stamp_info_to_std_time as convert, cis_standard_time_unit
        from cis.utils import concatenate

        timestamps = listify(timestamp)
        time_variables = data_variables[time_variable_name]
        time_data = []
        # Create a coordinate for each separate file to account for differing timestamps
        for file_time_var, timestamp in zip_longest(time_variables,
                                                    timestamps):
            metadata = get_metadata(file_time_var)
            if timestamp is not None:
                time_d = convert(file_time_var[:], metadata.units, timestamp)
            else:
                time_d = metadata.units.convert(file_time_var[:],
                                                cis_standard_time_unit)
            time_data.append(time_d)

        return AuxCoord(concatenate(time_data),
                        standard_name=standard_name,
                        units=cis_standard_time_unit)
Пример #29
0
    def create_data_object(self, filenames, variable):
        logging.debug("Creating data object for variable " + variable)

        variables = [("ER2_IMU/Longitude", "x"), ("ER2_IMU/Latitude", "y"),
                     ("ER2_IMU/gps_time", "t"), ("State/Pressure", "p"),
                     ("DataProducts/Altitude", "z"), ("header/date", ""),
                     (variable, '')]

        logging.info("Listing coordinates: " + str(variables))

        var_data = read_many_files_individually(filenames,
                                                [v[0] for v in variables])

        date_times = []
        for times, date in zip(var_data['ER2_IMU/gps_time'],
                               var_data['header/date']):
            # Date is stored as an array (of length 92??) of floats with format: yyyymmdd
            date_str = str(int(date[0]))
            t_unit = Unit('hours since {}-{}-{} 00:00:00'.format(
                date_str[0:4], date_str[4:6], date_str[6:8]))
            date_times.append(
                t_unit.convert(get_data(times), cis_standard_time_unit))

        # time_data = utils.concatenate([get_data(i) for i in var_data['ER2_IMU/gps_time']])
        # date_str = str(int(var_data['header/date'][0][0]))
        # Flatten the data by taking the 0th column of the transpose
        time_coord = DimCoord(utils.concatenate(date_times).T[0],
                              standard_name='time',
                              units=cis_standard_time_unit)

        # TODO This won't work for multiple files since the altitude bins are different for each flight...
        alt_data = utils.concatenate(
            [get_data(i) for i in var_data["DataProducts/Altitude"]])
        alt_coord = DimCoord(alt_data[0], standard_name='altitude', units='m')

        pres_data = utils.concatenate(
            [get_data(i) for i in var_data["State/Pressure"]])
        pres_coord = AuxCoord(pres_data,
                              standard_name='air_pressure',
                              units='atm')
        # Fix the air-pressure units
        pres_coord.convert_units('hPa')

        lat_data = utils.concatenate(
            [get_data(i) for i in var_data['ER2_IMU/Latitude']])
        lat_coord = AuxCoord(lat_data.T[0], standard_name='latitude')

        lon_data = utils.concatenate(
            [get_data(i) for i in var_data['ER2_IMU/Longitude']])
        lon_coord = AuxCoord(lon_data.T[0], standard_name='longitude')

        data = utils.concatenate([get_data(i) for i in var_data[variable]])
        metadata = get_metadata(var_data[variable][0])

        cube = Cube(np.ma.masked_invalid(data),
                    long_name=metadata.misc['Description'],
                    units=self.clean_units(metadata.units),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)],
                    aux_coords_and_dims=[(lat_coord, (0, )),
                                         (lon_coord, (0, )),
                                         (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
Пример #30
0
def load_hysplit(fname, variables=None):
    import numpy as np
    from numpy import ma
    from datetime import datetime, timedelta
    from cis.time_util import cis_standard_time_unit
    from cis.utils import add_element_to_list_in_dict, concatenate

    std_day = cis_standard_time_unit.num2date(0)

    fmetadata = get_file_metadata(fname)
    try:
        rawd = np.genfromtxt(fname,
                             skip_header=fmetadata['data_start'],
                             dtype=np.float64,
                             usemask=True)
    except (StopIteration, IndexError) as e:
        raise IOError(e)

    data_dict = {}
    # Get data for one trajectory at a time
    for t in range(1, fmetadata['n_trajectories']+1):
        tdata_dict = {}

        trajectory_data = rawd[rawd[:,hysplit_default_var.index('TRAJECTORY_NO')] == t]
        # Convert time from each row to standard time
        for trajectory in trajectory_data:
            day = datetime((int(trajectory[2]) + 2000), # TODO Dan: is it okay to assume this?
                            int(trajectory[3]),
                            int(trajectory[4]))
            sday = float((day - std_day).days)
            td = timedelta(hours=int(trajectory[5]), minutes=int(trajectory[6]))
            fractional_day = td.total_seconds()/(24.0*60.0*60.0)
            dt = sday + fractional_day
            add_element_to_list_in_dict(tdata_dict, 'DATETIMES', [dt])
        # Clean up data
        tdata_dict['DATETIMES'] = ma.array(concatenate(tdata_dict['DATETIMES'])) # TODO mask is only one value

        # Add other default data
        tdata_dict['LAT'] = trajectory_data[:,hysplit_default_var.index('LAT')]
        tdata_dict['LON'] = trajectory_data[:,hysplit_default_var.index('LON')]
        tdata_dict['ALT'] = trajectory_data[:,hysplit_default_var.index('ALT')]
        tdata_dict['PRESSURE'] = trajectory_data[:, hysplit_default_var.index('PRESSURE')]
        # TODO any other default variables to add?

        # If variables set, fetch only set variables
        if variables is not None:
            for key in variables:
                try:
                    tdata_dict[key] = trajectory_data[:,fmetadata['labels'].index(key)]
                except ValueError:
                    raise InvalidVariableError(key + "does not exist in " + fname)
        # Else, return all variables in file
        else:
            for label in fmetadata['custom_labels']:
                try:
                    tdata_dict[label] = trajectory_data[:,fmetadata['labels'].index(label)]
                except ValueError:
                    raise InvalidVariableError(key + " does not exist in " + fname)

        # TODO trajectory keys are tuples of lat/long/alt
        tkey = fmetadata['trajectories'][t]
        data_dict[tkey] = tdata_dict

    return data_dict
Пример #31
0
    def _create_bounded_coord_list(self):
        """Adaptation of the CIS MODIS_L2 class version that isn't lazy."""
        from cis.time_util import convert_sec_since_to_std_time
        from pyhdf.error import HDF4Error
        from pyhdf.SD import SD

        def calc_latlon_bounds(base_data, nrows=10):
            """Interpolate 10-line MODIS scans to return pixel edges."""
            from acp_utils import rolling_window
            from itertools import product
            from scipy.interpolate import RegularGridInterpolator

            # Coordinates in file give cell centres
            nx, ny = base_data.shape
            assert nx % nrows == 0
            x0 = np.arange(0.5, nrows, 1)
            y0 = np.arange(0.5, ny, 1)

            # Aerosol pixels skip the outermost columns
            ystart = (ny % nrows) // 2
            x1 = np.array([0, nrows])
            y1 = np.arange(ystart, ny + 1, nrows)

            # Iterate over 10-line chunks
            bounds = []
            for chunk in np.split(base_data, nx // nrows, 0):
                if (chunk.max() - chunk.min()) > 180.:
                    # Sodding dateline
                    chunk[chunk < 0.] += 360.
                interp = RegularGridInterpolator((x0, y0), chunk, "linear",
                                                 False, None)
                tmp = interp(list(product(x1, y1))).reshape(2, len(y1))
                corners = rolling_window(tmp, (2, 2))
                bounds.append(corners.reshape(ny // nrows, 4))

            # Ensure corners are given in sequential order
            bounds = np.ma.masked_invalid(bounds)
            bounds[..., 2:4] = bounds[..., [3, 2]]

            return bounds

        lon_bounds = []
        lat_bounds = []
        for f in self._mod03_filenames:
            try:
                file_object = SD(f)
                lon_1kmdata = _get_hdf_data(file_object, "Longitude")
                lat_1kmdata = _get_hdf_data(file_object, "Latitude")
                file_object.end()
            except HDF4Error:
                raise IOError("Corrupted file " + f)

            tmp_bounds = calc_latlon_bounds(lon_1kmdata)
            tmp_bounds[tmp_bounds > 180.] -= 360.
            tmp_bounds[tmp_bounds <= -180.] += 360.
            lon_bounds.append(tmp_bounds)

            tmp_bounds = calc_latlon_bounds(lat_1kmdata)
            tmp_bounds[tmp_bounds >= 90.] = np.ma.masked
            tmp_bounds[tmp_bounds <= -90.] = np.ma.masked
            lat_bounds.append(tmp_bounds)

        coords = self._create_coord_list()

        coords[0].bounds = concatenate(lat_bounds)
        coords[1].bounds = concatenate(lon_bounds)

        unique_times = np.unique(coords[2].data.compressed())
        try:
            deltas = unique_times[1:] - unique_times[:-1]
            delta_map = {t: d / 2 for t, d in zip(unique_times, deltas)}
            delta_map[unique_times[-1]] = deltas[-1] / 2
            time_bounds = np.ma.array([
                [t - delta_map[t], t + delta_map[t]]
                if t is not np.ma.masked else [np.ma.masked, np.ma.masked]
                for t in coords[2].data.ravel()
            ]).reshape(coords[2].data.shape + (2, ))
        except IndexError:
            # File too small to have multiple time stamps; guess +-2.5min
            time_bounds = np.stack(
                [coords[2].data - 0.00174, coords[2].data + 0.00174], axis=2)
        coords[2].bounds = convert_sec_since_to_std_time(
            time_bounds, MODIS_REFERENCE_TIME)

        return coords
Пример #32
0
    def _calculate_grid_time(self, var_name, lat_data, lon_data):
        """Approximate time from a pair of corresponding MOD03 files"""
        from osgeo.gdal import Open
        from scipy.interpolate import griddata

        def fetch_MOD03_coordinates(start_time, aqua=False):
            import os.path
            from glob import glob
            from pyhdf.SD import SD
            from pyhdf.error import HDF4Error

            # Locate MOD03 file
            search_path = start_time.strftime(
                os.path.join(self.mod03_path, "MOD03.A%Y%j.%H%M.061*hdf"))
            if aqua:
                # NOTE: System dependent approximation
                search_path = search_path.replace("MOD", "MYD")
            try:
                mod03_file = glob(search_path)[0]
            except IndexError:
                raise FileNotFoundError("MOD03: " + search_path)

            # Read space-time grid from that file
            try:
                file_object = SD(mod03_file)
                dims = file_object.datasets()["Longitude"][1]
                count = dims[0] // 10, dims[1] // 10
                mod_lon = _get_hdf_data(file_object,
                                        "Longitude",
                                        start=(0, 2),
                                        count=count,
                                        stride=(10, 10))
                mod_lat = _get_hdf_data(file_object,
                                        "Latitude",
                                        start=(0, 2),
                                        count=count,
                                        stride=(10, 10))
                mod_time = _get_hdf_data(file_object,
                                         "EV start time",
                                         count=count[:1])
                file_object.end()
            except HDF4Error:
                raise IOError("Corrupted file: " + mod03_file)

            return mod_lon, mod_lat, mod_time

        time_data = []
        variable = Open(var_name)
        meta = variable.GetMetadata_Dict()
        for timestamp in meta["Orbit_time_stamp"].split():
            # Parse time stamp
            start_time = dt.datetime.strptime(timestamp[:-1], "%Y%j%H%M")

            try:
                # Interpolate time from MOD03 files
                mod_lon0, mod_lat0, mod_time0 = fetch_MOD03_coordinates(
                    start_time - dt.timedelta(seconds=300),
                    timestamp[-1] == "A")
                mod_lon1, mod_lat1, mod_time1 = fetch_MOD03_coordinates(
                    start_time, timestamp[-1] == "A")
                mod_lon = concatenate([mod_lon0, mod_lon1])
                mod_lat = concatenate([mod_lat0, mod_lat1])
                mod_time = concatenate([mod_time0, mod_time1])
                if (mod_lon.max() - mod_lon.min()) > 180.:
                    # Sodding dateline
                    mod_lon[mod_lon < 0.] += 360.

                # Interpolate that grid onto the sinusoidal projection
                time = griddata((mod_lon.ravel(), mod_lat.ravel()),
                                np.tile(mod_time, mod_lon.shape[1]),
                                (lon_data, lat_data),
                                method="nearest")
            except (FileNotFoundError, TypeError):
                # Just use the orbit start time
                seconds = start_time - MODIS_REFERENCE_TIME
                time = np.full(lat_data.shape, seconds.total_seconds())

            time_data.append(time)

        return concatenate(time_data)