Exemplo n.º 1
0
    def process(self, cube):
        """
        Calculate extrema values for diagnostic in cube over the period given
        from the start_hour, both set at initialisation.

        Args:
            cube  (iris.cube.Cube):
                Cube of diagnostic data with a utc_offset coordinate.

        Returns:
            period_cubes (iris.cube.CubeList):
                CubeList of diagnostic extrema cubes.

        """
        # Change to 64 bit to avoid the 2038 problem with any time
        # manipulations on units in seconds since the epoch.
        cube.coord('time').points = cube.coord('time').points.astype(np.int64)

        # Adjust times on cube to be local to each site.
        local_tz_cube = make_local_time_cube(cube)

        # Starts at start_hour on first available day, runs until start_hour on
        # final_date.
        start_time, end_time = get_datetime_limits(local_tz_cube.coord('time'),
                                                   self.start_hour)
        num_periods = int(
            np.ceil((end_time - start_time).total_seconds()/3600/self.period))
        starts = [start_time + datetime.timedelta(hours=i*self.period)
                  for i in range(num_periods)]
        ends = [time + datetime.timedelta(hours=self.period)
                for time in starts]

        # Extract extrema values over desired time periods, producing a cube
        # for each period.
        period_cubes = CubeList()
        for period_start, period_end in zip(starts, ends):
            extrema_constraint = datetime_constraint(period_start, period_end)
            with iris.FUTURE.context(cell_datetime_objects=True):
                cube_over_period = local_tz_cube.extract(extrema_constraint)
            if cube_over_period is not None:
                # Ensure time dimension of resulting cube reflects period.
                mid_time = dt_to_utc_hours(period_start +
                                           (period_end - period_start)/2)
                bounds = [dt_to_utc_hours(period_start),
                          dt_to_utc_hours(period_end)]

                extremas = [['max', iris.analysis.MAX],
                            ['min', iris.analysis.MIN]]
                for name, method in extremas:
                    cube_out = cube_over_period.collapsed('time', method)
                    cube_out.long_name = cube_out.name() + '_' + name
                    cube_out.standard_name = None
                    cube_out.coord('time').convert_units(
                        'hours since 1970-01-01 00:00:00')
                    cube_out.coord('time').points = mid_time
                    cube_out.coord('time').bounds = bounds
                    period_cubes.append(cube_out)

        return period_cubes
Exemplo n.º 2
0
def process_diagnostic(diagnostic,
                       neighbours,
                       sites,
                       forecast_times,
                       data_path,
                       ancillary_data,
                       output_path=None):
    """
    Extract data and write output for a given diagnostic.

    Args:
    -----
    diagnostic : string
        String naming the diagnostic to be processed.

    neighbours : numpy.array
        Array of neigbouring grid points that are associated with sites
        in the SortedDictionary of sites.

    sites : dict
        A dictionary containing the properties of spotdata sites.

    forecast_times : list[datetime.datetime objects]
        A list of datetimes representing forecast times for which data is
        required.

    data_path : string
        Path to diagnostic data files.

    ancillary_data : dict
        A dictionary containing additional model data that is needed.
        e.g. {'orography': <cube of orography>}

    output_path : str
        Path to which output file containing processed diagnostic should be
        written.

    Returns:
    --------
    None

    Raises:
    -------
    IOError : If no relevant data cubes are found at given path.
    Exception : No spotdata returned.

    """
    # Search directory structure for all files relevant to current diagnostic.
    files_to_read = [
        os.path.join(dirpath, filename)
        for dirpath, _, files in os.walk(data_path) for filename in files
        if diagnostic['filepath'] in filename
    ]
    if not files_to_read:
        raise IOError('No relevant data files found in {}.'.format(data_path))

    # Load cubes into an iris.cube.CubeList.
    cubes = Load('multi_file').process(files_to_read,
                                       diagnostic['diagnostic_name'])

    # Grab the relevant set of grid point neighbours for the neighbour finding
    # method being used by this diagnostic.
    neighbour_hash = construct_neighbour_hash(diagnostic['neighbour_finding'])
    neighbour_list = neighbours[neighbour_hash]

    # Check if additional diagnostics are needed (e.g. multi-level data).
    # If required, load into the additional_diagnostics dictionary.
    additional_diagnostics = get_method_prerequisites(
        diagnostic['interpolation_method'], data_path)

    # Create empty iris.cube.CubeList to hold extracted data cubes.
    resulting_cubes = CubeList()

    # Get optional kwargs that may be set to override defaults.
    optionals = [
        'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance',
        'dthetadz_threshold', 'dz_max_adjustment'
    ]
    kwargs = {}
    if ancillary_data.get('config_constants') is not None:
        for optional in optionals:
            constant = ancillary_data.get('config_constants').get(optional)
            if constant is not None:
                kwargs[optional] = constant

    # Loop over forecast times.
    for a_time in forecast_times:
        # Extract Cube from CubeList at current time.
        time_extract = datetime_constraint(a_time)
        cube = extract_cube_at_time(cubes, a_time, time_extract)
        if cube is None:
            # If no cube is available at given time, try the next time.
            continue

        ad = {}
        if additional_diagnostics is not None:
            # Extract additional diagnostcs at current time.
            ad = extract_ad_at_time(additional_diagnostics, a_time,
                                    time_extract)

        args = (cube, sites, neighbour_list, ancillary_data, ad)

        # Extract diagnostic data using defined method.
        resulting_cubes.append(
            ExtractData(diagnostic['interpolation_method']).process(
                *args, **kwargs))

    # Concatenate CubeList into Cube, creating a time DimCoord, and write out.
    if resulting_cubes:
        cube_out, = resulting_cubes.concatenate()
        WriteOutput('as_netcdf', dir_path=output_path).process(cube_out)
    else:
        raise Exception('No data available at given forecast times.')

    # If set in the configuration, extract the diagnostic maxima and minima
    # values.
    if diagnostic['extrema']:
        extrema_cubes = ExtractExtrema(24, start_hour=9).process(cube_out)
        extrema_cubes = extrema_cubes.merge()
        for extrema_cube in extrema_cubes:
            WriteOutput('as_netcdf',
                        dir_path=output_path).process(extrema_cube)
Exemplo n.º 3
0
def process_diagnostic(diagnostics, neighbours, sites,
                       ancillary_data, diagnostic_name):
    """
    Extract data and write output for a given diagnostic.

    Args:
        diagnostics (dict):
            Dictionary containing information regarding how the diagnostics
            are to be processed.

            For example::

              {
                  "temperature": {
                      "diagnostic_name": "air_temperature",
                      "extrema": true,
                      "filepath": "temperature_at_screen_level",
                      "interpolation_method":
                          "model_level_temperature_lapse_rate",
                      "neighbour_finding": {
                          "land_constraint": false,
                          "method": "fast_nearest_neighbour",
                          "vertical_bias": null
                      }
                  }
              }

        neighbours (numpy.array):
            Array of neigbouring grid points that are associated with sites
            in the SortedDictionary of sites.

        sites (dict):
            A dictionary containing the properties of spotdata sites.

        ancillary_data (dict):
            A dictionary containing additional model data that is needed.
            e.g. {'orography': <cube of orography>}

        diagnostic_name (string):
            A string matching the keys in the diagnostics dictionary that
            will be used to access information regarding how the diagnostic
            is to be processed.

    Returns:
        (tuple): tuple containing:
            **resulting_cube** (iris.cube.Cube or None):
                Cube after extracting the diagnostic requested using the
                desired extraction method.
                None is returned if the "resulting_cubes" is an empty CubeList
                after processing.
            **extrema_cubes** (iris.cube.CubeList or None):
                CubeList containing extrema values, if the 'extrema' diagnostic
                is requested.
                None is returned if the value for diagnostic_dict["extrema"]
                is False, so that the extrema calculation is not required.

    """
    diagnostic_dict = diagnostics[diagnostic_name]

    # Grab the relevant set of grid point neighbours for the neighbour finding
    # method being used by this diagnostic.
    neighbour_hash = (
        construct_neighbour_hash(diagnostic_dict['neighbour_finding']))
    neighbour_list = neighbours[neighbour_hash]

    # Get optional kwargs that may be set to override defaults.
    optionals = ['upper_level', 'lower_level', 'no_neighbours',
                 'dz_tolerance', 'dthetadz_threshold', 'dz_max_adjustment']
    kwargs = {}
    if ancillary_data.get('config_constants') is not None:
        for optional in optionals:
            constant = ancillary_data.get('config_constants').get(optional)
            if constant is not None:
                kwargs[optional] = constant

    # Create a list of datetimes to loop through.
    forecast_times = []
    for cube in diagnostic_dict["data"]:
        time = cube.coord("time")
        forecast_times.extend(time.units.num2date(time.points))

    # Create empty iris.cube.CubeList to hold extracted data cubes.
    resulting_cubes = CubeList()

    # Loop over forecast times.
    for a_time in forecast_times:
        # Extract Cube from CubeList at current time.
        time_extract = datetime_constraint(a_time)
        cube = extract_cube_at_time(
            diagnostic_dict["data"], a_time, time_extract)
        if cube is None:
            # If no cube is available at given time, try the next time.
            continue

        ad = {}
        if diagnostic_dict["additional_data"] is not None:
            # Extract additional diagnostics at current time.
            ad = extract_ad_at_time(diagnostic_dict["additional_data"], a_time,
                                    time_extract)

        args = (cube, sites, neighbour_list, ancillary_data, ad)

        # Extract diagnostic data using defined method.
        resulting_cubes.append(
            ExtractData(
                diagnostic_dict['interpolation_method']).process(
                    *args, **kwargs))

    if resulting_cubes:
        # Concatenate CubeList into Cube for cubes with different
        # forecast times.
        resulting_cube = resulting_cubes.concatenate_cube()
    else:
        resulting_cube = None

    if diagnostic_dict['extrema']:
        extrema_cubes = (
            ExtractExtrema(24, start_hour=9).process(resulting_cube.copy()))
        extrema_cubes = extrema_cubes.merge()
    else:
        extrema_cubes = None

    return resulting_cube, extrema_cubes