Exemplo n.º 1
0
    def test_monitor(self):
        """
        Test monitor integration
        """
        ref = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90)
        })

        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        with create_tmp_file() as tmp_file:
            ref.to_netcdf(tmp_file, 'w')
            m = ConsoleMonitor()
            anomaly.anomaly_external(ds, tmp_file, monitor=m)
            self.assertEqual(m._percentage, 100)
Exemplo n.º 2
0
    def test_partial(self):
        """
        Test situations where the given dataset does not correspond perfectly
        to the reference dataset.
        """
        # Test mismatching variable names
        ref = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90)
        })

        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        expected = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        with create_tmp_file() as tmp_file:
            ref.to_netcdf(tmp_file, 'w')
            actual = anomaly.anomaly_external(ds, tmp_file)
            assert_dataset_equal(actual, expected)

            # Test differing spatial extents
            ds = subset_spatial(ds, '-50, -50, 50, 50')
            expected = xr.Dataset({
                'first': (['lat', 'lon', 'time'], np.zeros([27, 26, 24])),
                'lat':
                np.linspace(-52, 52, 27),
                'lon':
                np.linspace(-50, 50, 26),
                'time': [datetime(2000, x, 1) for x in range(1, 13)] +
                [datetime(2001, x, 1) for x in range(1, 13)]
            })
            actual = anomaly.anomaly_external(ds, tmp_file)
            assert_dataset_equal(actual, expected)
Exemplo n.º 3
0
    def test_nominal(self):
        """
        Nominal execution test
        """
        # Test nominal
        ref = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90)
        })

        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        expected = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        with create_tmp_file() as tmp_file:
            ref.to_netcdf(tmp_file, 'w')
            actual = anomaly.anomaly_external(ds, tmp_file)
            assert_dataset_equal(actual, expected)

        # Test with reference data with a labeled time coordinate
        ref['time'] = [datetime(1700, x, 1) for x in range(1, 13)]
        with create_tmp_file() as tmp_file:
            ref.to_netcdf(tmp_file, 'w')
            actual = anomaly.anomaly_external(ds, tmp_file)
            assert_dataset_equal(actual, expected)
Exemplo n.º 4
0
    def test_validation(self):
        """
        Test input validation
        """
        # Test wrong dtype
        ref = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90)
        })

        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [x for x in range(0, 24)]
        })

        with create_tmp_file() as tmp_file:
            ref.to_netcdf(tmp_file, 'w')
            with self.assertRaises(ValueError) as err:
                anomaly.anomaly_external(ds, tmp_file)
            self.assertIn('dtype datetime', str(err.exception))

            # Test missing time coordinate
            ds = xr.Dataset({
                'first': (['lat', 'lon'], np.ones([45, 90])),
                'second': (['lat', 'lon'], np.ones([45, 90])),
                'lat': np.linspace(-88, 88, 45),
                'lon': np.linspace(-178, 178, 90)
            })
            with self.assertRaises(ValueError) as err:
                anomaly.anomaly_external(ds, tmp_file)
            self.assertIn('time coordinate.', str(err.exception))
Exemplo n.º 5
0
    def test_dask(self):
        """
        Test if the operation works with xarray datasets with dask as the
        backend.
        """
        ref = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90)
        })

        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        expected = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        # Test that ds is not a dask array
        self.assertTrue(not ds.chunks)
        with create_tmp_file() as tmp1:
            ref.to_netcdf(tmp1, 'w')
            with create_tmp_file() as tmp2:
                ds.to_netcdf(tmp2, 'w')
                # This makes ds a dask dataset in xarray backend
                ds = xr.open_dataset(tmp2, chunks={})
                # Test that it is indeed the case
                self.assertFalse(not ds.chunks)

                actual = anomaly.anomaly_external(ds, tmp1)
                assert_dataset_equal(actual, expected)
                # Test that actual is also a dask array, based on ds
                self.assertEqual(actual.chunks, ds.chunks)
Exemplo n.º 6
0
Arquivo: index.py Projeto: whigg/cate
def _generic_index_calculation(
        ds: xr.Dataset,
        var: VarName.TYPE,
        region: PolygonLike.TYPE,
        window: int,
        file: str,
        name: str,
        threshold: float = None,
        monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    A generic index calculation. Where an index is defined as an anomaly
    against the given reference of a moving average of the given window size of
    the given given region of the given variable of the given dataset.

    :param ds: Dataset from which to calculate the index
    :param var: Variable from which to calculate index
    :param region: Spatial subset from which to calculate the index
    :param window: Window size for the moving average
    :param file: Path to the reference file
    :param threshold: Absolute threshold that indicates an ENSO event
    :param name: Name of the index
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries
    """
    var = VarName.convert(var)
    region = PolygonLike.convert(region)

    with monitor.starting("Calculate the index", total_work=2):
        ds = select_var(ds, var)
        ds_subset = subset_spatial(ds, region)
        anom = anomaly_external(ds_subset, file, monitor=monitor.child(1))
        with monitor.child(1).observing("Calculate mean"):
            ts = anom.mean(dim=['lat', 'lon'])
        df = pd.DataFrame(data=ts[var].values,
                          columns=[name],
                          index=ts.time.values)
        retval = df.rolling(window=window, center=True).mean().dropna()

    if threshold is None:
        return retval

    retval['El Nino'] = pd.Series((retval[name] > threshold),
                                  index=retval.index)
    retval['La Nina'] = pd.Series((retval[name] < -threshold),
                                  index=retval.index)
    return retval
Exemplo n.º 7
0
    def test_transform(self):
        """
        Test the application of an arithmetic transormation to the dataset, as
        part of the anomaly calculation.
        """
        ref = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90)
        })

        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.ones([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        expected = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 24])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)] +
            [datetime(2001, x, 1) for x in range(1, 13)]
        })

        ds = ds * 10
        expected = expected + 3
        with create_tmp_file() as tmp_file:
            ref.to_netcdf(tmp_file, 'w')
            actual = anomaly.anomaly_external(ds,
                                              tmp_file,
                                              transform='log10, +3')
            assert_dataset_equal(actual, expected)
Exemplo n.º 8
0
def _generic_index_calculation(ds: xr.Dataset,
                               var: VarName.TYPE,
                               region: PolygonLike.TYPE,
                               window: int,
                               file: str,
                               name: str,
                               threshold: float = None,
                               monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    A generic index calculation. Where an index is defined as an anomaly
    against the given reference of a moving average of the given window size of
    the given given region of the given variable of the given dataset.

    :param ds: Dataset from which to calculate the index
    :param var: Variable from which to calculate index
    :param region: Spatial subset from which to calculate the index
    :param window: Window size for the moving average
    :param file: Path to the reference file
    :param threshold: Absolute threshold that indicates an ENSO event
    :param name: Name of the index
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries
    """
    var = VarName.convert(var)
    region = PolygonLike.convert(region)

    with monitor.starting("Calculate the index", total_work=2):
        ds = select_var(ds, var)
        ds_subset = subset_spatial(ds, region)
        anom = anomaly_external(ds_subset, file, monitor=monitor.child(1))
        with monitor.child(1).observing("Calculate mean"):
            ts = anom.mean(dim=['lat', 'lon'])
        df = pd.DataFrame(data=ts[var].values, columns=[name], index=ts.time)
        retval = df.rolling(window=window, center=True).mean().dropna()

    if threshold is None:
        return retval

    retval['El Nino'] = pd.Series((retval[name] > threshold),
                                  index=retval.index)
    retval['La Nina'] = pd.Series((retval[name] < -threshold),
                                  index=retval.index)
    return retval