Exemplo n.º 1
0
def test_pcm_init_req():
    """Test PCM default instantiation with required arguments"""
    pcm_features_list = ({
        'F1': np.arange(0, -500, 2.)
    }, {
        'F1': np.arange(0, -500, 2.),
        'F2': np.arange(0, -500, 2.)
    }, {
        'F1': np.arange(0, -500, 2.),
        'F2': None
    })

    with pytest.raises(TypeError):
        m = pcm()

    with pytest.raises(TypeError):
        m = pcm(K=0)

    with pytest.raises(TypeError):
        m = pcm(features=pcm_features_list[0])

    with pytest.raises(PCMClassError):
        m = pcm(K=0, features=pcm_features_list[0])

    with pytest.raises(PCMFeatureError):
        m = pcm(K=1, features=dict())

    for pcm_features in pcm_features_list:
        m = pcm(K=3, features=pcm_features)
        assert isinstance(m, pcm) == True

    with pytest.raises(NotFittedError):
        m = pcm(K=1, features=pcm_features_list[0])
        if not hasattr(m, 'fitted'):
            raise NotFittedError
Exemplo n.º 2
0
def test_saveload_prediction():
    """Test PCM save to netcdf"""
    ds = pyxpcm.tutorial.open_dataset('dummy').load(Np=50, Nz=20)
    pcm_features = {'TEMP': ds['depth'], 'PSAL': ds['depth']}

    # Determine backends to test:
    backends = list()
    try:
        import sklearn
        backends.append('sklearn')
    except ModuleNotFoundError:
        pass
    try:
        import dask_ml
        backends.append('dask_ml')
    except ModuleNotFoundError:
        pass

    # Create a model, fit, predict, save, load, predict
    file = '.pyxpcm_dummy_file.nc'
    for backend in backends:
        for scaling in [0, 1, 2]:
            for reduction in [0, 1]:
                M = pcm(K=3,
                        features=pcm_features,
                        scaling=scaling,
                        reduction=reduction,
                        backend=backend)
                M.fit(ds)
                M.to_netcdf(file, mode='w')
                label_ref = M.predict(ds, inplace=False)
                M_loaded = pyxpcm.load_netcdf(file)
                label_new = M_loaded.predict(ds, inplace=False)
                assert label_ref.equals(
                    label_new
                ) == True, "Netcdf I/O not producing similar results"

    # Delete file at the end of the test:
    os.remove(file)
Exemplo n.º 3
0
def test_pcm_init_opt():
    """Test PCM instantiation with optional arguments"""
    pcm_features_list = ({
        'F1': np.arange(0, -500, 2.)
    }, {
        'F1': np.arange(0, -500, 2.),
        'F2': np.arange(0, -500, 2.)
    }, {
        'F1': np.arange(0, -500, 2.),
        'F2': None
    })

    for pcm_features in pcm_features_list:

        for scaling in [0, 1, 2]:
            m = pcm(K=3, features=pcm_features, scaling=scaling)
            assert isinstance(m, pcm) == True

            for reduction in [0, 1]:
                m = pcm(K=3,
                        features=pcm_features,
                        scaling=scaling,
                        reduction=reduction)
                assert isinstance(m, pcm) == True

                m = pcm(K=3,
                        features=pcm_features,
                        scaling=scaling,
                        reduction=reduction,
                        maxvar=90.)
                assert isinstance(m, pcm) == True

                m = pcm(K=3,
                        features=pcm_features,
                        scaling=scaling,
                        reduction=reduction,
                        maxvar=90.,
                        classif='gmm')
                assert isinstance(m, pcm) == True

                for covariance_type in ['full', 'diag', 'spherical']:
                    m = pcm(K=3,
                            features=pcm_features,
                            scaling=scaling,
                            reduction=reduction,
                            maxvar=90.,
                            classif='gmm',
                            covariance_type=covariance_type)
                    assert isinstance(m, pcm) == True

                    m = pcm(K=3,
                            features=pcm_features,
                            scaling=scaling,
                            reduction=reduction,
                            maxvar=90.,
                            classif='gmm',
                            covariance_type=covariance_type,
                            verb=False)
                    assert isinstance(m, pcm) == True

                    m = pcm(K=3,
                            features=pcm_features,
                            scaling=scaling,
                            reduction=reduction,
                            maxvar=90.,
                            classif='gmm',
                            covariance_type=covariance_type,
                            verb=False,
                            debug=True)
                    assert isinstance(m, pcm) == True

                    m = pcm(K=3,
                            features=pcm_features,
                            scaling=scaling,
                            reduction=reduction,
                            maxvar=90.,
                            classif='gmm',
                            covariance_type=covariance_type,
                            verb=False,
                            debug=True,
                            timeit=True)
                    assert isinstance(m, pcm) == True

                    m = pcm(K=3,
                            features=pcm_features,
                            scaling=scaling,
                            reduction=reduction,
                            maxvar=90.,
                            classif='gmm',
                            covariance_type=covariance_type,
                            verb=False,
                            debug=True,
                            timeit=True,
                            timeit_verb=True)
                    assert isinstance(m, pcm) == True
Exemplo n.º 4
0
def test_fitting():
    dlist = ['dummy', 'argo', 'isas_snapshot', 'isas_series']

    for d in dlist:
        # Load and set-up the dataset:
        ds = pyxpcm.tutorial.open_dataset(d).load()
        ds['TEMP'].attrs['feature_name'] = 'temperature'
        ds['PSAL'].attrs['feature_name'] = 'salinity'
        if d == 'argo':
            ds = ds.rename({'DEPTH': 'depth'})
            ds['depth'] = xr.DataArray(
                np.linspace(-10, -1405., len(ds['depth'])),
                dims='depth')  # Modify vertical axis for test purposes
            ds['depth'].attrs['axis'] = 'Z'

        # Add single-depth level variables:
        ds['SST'] = ds['TEMP'].isel(depth=0).copy()
        ds['SST'].attrs['feature_name'] = 'sst'
        ds['OMT'] = ds['TEMP'].mean(dim='depth').copy()
        ds['OMT'].attrs['feature_name'] = 'omt'
        # print("Dataset surface depth level: ", ds['depth'].values[0])
        # print(ds)

        for config in range(0, 5 + 1):

            if config == 0:
                # Single feature, vertical axis from dataset
                z = ds['depth'].where(ds['depth'] >= -200, drop=True)
                pcm_features = {'temperature': z}

            elif config == 1:
                # Two features, vertical axis from dataset
                z = ds['depth'].where(ds['depth'] >= -200, drop=True)
                pcm_features = {'temperature': z, 'salinity': z}

            elif config == 2:
                # Single feature, new vertical axis (from -10. to avoid vertical mixing)
                z = np.arange(-10., -500, -10.1)
                pcm_features = {'temperature': z}

            elif config == 3:
                # Two features, new vertical axis (from surface to trigger vertical mixing)
                z = np.arange(0., -500, -10.1)
                pcm_features = {'temperature': z, 'salinity': z}

            elif config == 4:
                # Two features, new vertical axis (from surface to trigger vertical mixing) and a slice feature (single-level)
                z = np.arange(0., -500, -10.1)
                pcm_features = {'temperature': z, 'sst': None}

            elif config == 5:
                # Two slice features:
                pcm_features = {'omt': None, 'sst': None}

            #
            # default_opts = {'K': 3, 'features': pcm_features, 'reduction': 1, 'debug': 0, 'timeit': 0, 'chunk_size': 'auto'}
            default_opts = {
                'K': 3,
                'features': pcm_features
            }  # Default, no options specified
            for backend in backends:
                print("\n", "=" * 60,
                      "CONFIG %i / %s / %s" % (config, d, backend),
                      list(pcm_features.keys()))
                default_opts['backend'] = backend
                m = pcm(**default_opts)
                m.fit(ds)
Exemplo n.º 5
0
def train_on_interpolated_year(
    time_i: int = cst.EXAMPLE_TIME_INDEX,
    k_clusters: int = cst.K_CLUSTERS,
    maxvar: int = cst.D_PCS,
    min_depth: float = cst.MIN_DEPTH,
    max_depth: float = cst.MAX_DEPTH,
    remove_init_var: bool = True,
    separate_pca: bool = False,
    interp: bool = True,
    remake: bool = cst.REMAKE,
) -> Tuple[pyxpcm.pcm, xr.Dataset]:
    """Train on interpolated year.

    Args:
        time_i (int, optional): time index. Defaults to cst.EXAMPLE_TIME_INDEX.
        k_clusters (int, optional): clusters. Defaults to cst.K_CLUSTERS.
        maxvar (int, optional): num pca. Defaults to cst.D_PCS.
        min_depth (float, optional): minimum depth for column.
            Defaults to cst.MIN_DEPTH.
        max_depth (float, optional): maximum depth for column.
            Defaults to cst.MAX_DEPTH.
        separate_pca (bool, optional): separate the pca. Defaults to True.
        remove_init_var (bool, optional): remove initial variables. Defaults to True.

    Returns:
        Tuple[pyxpcm.pcm, xr.Dataset]: the fitted object and its corresponding dataset.

    """
    z = np.arange(-min_depth, -max_depth, -10.0)
    features_pcm = dict()
    for var in cst.VAR_NAME_LIST:
        features_pcm[var] = z
    features = cst.FEATURES_D
    fname = cst.INTERP_FILE_NAME
    if not os.path.isfile(fname) or remake is True:
        if os.path.isfile(fname):
            os.remove(fname)
        print("going to save to: ", fname)
        salt_nc = xr.open_dataset(
            cst.SALT_FILE).isel(time=slice(time_i, time_i + 12))
        theta_nc = xr.open_dataset(
            cst.THETA_FILE).isel(time=slice(time_i, time_i + 12))
        big_nc = xr.merge([salt_nc, theta_nc])
        both_nc = big_nc.where(big_nc.coords[cst.DEPTH_NAME] > max_depth).drop(
            cst.USELESS_LIST)
        if interp:
            mult_fact = 2
            lons_new = np.linspace(both_nc.XC.min(), both_nc.XC.max(),
                                   60 * 4 * mult_fact)
            lats_new = np.linspace(both_nc.YC.min(), both_nc.YC.max(),
                                   60 * mult_fact)
            ds = both_nc.interp(coords={
                cst.Y_COORD: lats_new,
                cst.X_COORD: lons_new
            })
        else:
            ds = both_nc
        ds.to_netcdf(fname)
    else:
        ds = xr.open_dataset(fname)

    pcm_object = pcm(
        K=k_clusters,
        features=features_pcm,
        separate_pca=separate_pca,
        maxvar=maxvar,
        timeit=True,
        timeit_verb=1,
    )

    pcm_object.fit(ds, features=features, dim=cst.Z_COORD)
    pcm_object.add_pca_to_xarray(ds,
                                 features=features,
                                 dim=cst.Z_COORD,
                                 inplace=True)
    pcm_object.find_i_metric(ds, inplace=True)
    pcm_object.predict(ds, features=features, dim=cst.Z_COORD, inplace=True)

    del ds.PCA_VALUES.attrs["_pyXpcm_cleanable"]
    del ds.IMETRIC.attrs["_pyXpcm_cleanable"]
    del ds.A_B.attrs["_pyXpcm_cleanable"]
    del ds.PCM_LABELS.attrs["_pyXpcm_cleanable"]

    if remove_init_var:
        ds = ds.drop(cst.VAR_NAME_LIST)

    # Tuple[pyxpcm.pcm, xr.Dataset]
    return pcm_object, ds