def test_pcm_init_req(): """Test PCM default instantiation with required arguments""" pcm_features_list = ({ 'F1': np.arange(0, -500, 2.) }, { 'F1': np.arange(0, -500, 2.), 'F2': np.arange(0, -500, 2.) }, { 'F1': np.arange(0, -500, 2.), 'F2': None }) with pytest.raises(TypeError): m = pcm() with pytest.raises(TypeError): m = pcm(K=0) with pytest.raises(TypeError): m = pcm(features=pcm_features_list[0]) with pytest.raises(PCMClassError): m = pcm(K=0, features=pcm_features_list[0]) with pytest.raises(PCMFeatureError): m = pcm(K=1, features=dict()) for pcm_features in pcm_features_list: m = pcm(K=3, features=pcm_features) assert isinstance(m, pcm) == True with pytest.raises(NotFittedError): m = pcm(K=1, features=pcm_features_list[0]) if not hasattr(m, 'fitted'): raise NotFittedError
def test_saveload_prediction(): """Test PCM save to netcdf""" ds = pyxpcm.tutorial.open_dataset('dummy').load(Np=50, Nz=20) pcm_features = {'TEMP': ds['depth'], 'PSAL': ds['depth']} # Determine backends to test: backends = list() try: import sklearn backends.append('sklearn') except ModuleNotFoundError: pass try: import dask_ml backends.append('dask_ml') except ModuleNotFoundError: pass # Create a model, fit, predict, save, load, predict file = '.pyxpcm_dummy_file.nc' for backend in backends: for scaling in [0, 1, 2]: for reduction in [0, 1]: M = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, backend=backend) M.fit(ds) M.to_netcdf(file, mode='w') label_ref = M.predict(ds, inplace=False) M_loaded = pyxpcm.load_netcdf(file) label_new = M_loaded.predict(ds, inplace=False) assert label_ref.equals( label_new ) == True, "Netcdf I/O not producing similar results" # Delete file at the end of the test: os.remove(file)
def test_pcm_init_opt(): """Test PCM instantiation with optional arguments""" pcm_features_list = ({ 'F1': np.arange(0, -500, 2.) }, { 'F1': np.arange(0, -500, 2.), 'F2': np.arange(0, -500, 2.) }, { 'F1': np.arange(0, -500, 2.), 'F2': None }) for pcm_features in pcm_features_list: for scaling in [0, 1, 2]: m = pcm(K=3, features=pcm_features, scaling=scaling) assert isinstance(m, pcm) == True for reduction in [0, 1]: m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction) assert isinstance(m, pcm) == True m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90.) assert isinstance(m, pcm) == True m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90., classif='gmm') assert isinstance(m, pcm) == True for covariance_type in ['full', 'diag', 'spherical']: m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90., classif='gmm', covariance_type=covariance_type) assert isinstance(m, pcm) == True m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90., classif='gmm', covariance_type=covariance_type, verb=False) assert isinstance(m, pcm) == True m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90., classif='gmm', covariance_type=covariance_type, verb=False, debug=True) assert isinstance(m, pcm) == True m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90., classif='gmm', covariance_type=covariance_type, verb=False, debug=True, timeit=True) assert isinstance(m, pcm) == True m = pcm(K=3, features=pcm_features, scaling=scaling, reduction=reduction, maxvar=90., classif='gmm', covariance_type=covariance_type, verb=False, debug=True, timeit=True, timeit_verb=True) assert isinstance(m, pcm) == True
def test_fitting(): dlist = ['dummy', 'argo', 'isas_snapshot', 'isas_series'] for d in dlist: # Load and set-up the dataset: ds = pyxpcm.tutorial.open_dataset(d).load() ds['TEMP'].attrs['feature_name'] = 'temperature' ds['PSAL'].attrs['feature_name'] = 'salinity' if d == 'argo': ds = ds.rename({'DEPTH': 'depth'}) ds['depth'] = xr.DataArray( np.linspace(-10, -1405., len(ds['depth'])), dims='depth') # Modify vertical axis for test purposes ds['depth'].attrs['axis'] = 'Z' # Add single-depth level variables: ds['SST'] = ds['TEMP'].isel(depth=0).copy() ds['SST'].attrs['feature_name'] = 'sst' ds['OMT'] = ds['TEMP'].mean(dim='depth').copy() ds['OMT'].attrs['feature_name'] = 'omt' # print("Dataset surface depth level: ", ds['depth'].values[0]) # print(ds) for config in range(0, 5 + 1): if config == 0: # Single feature, vertical axis from dataset z = ds['depth'].where(ds['depth'] >= -200, drop=True) pcm_features = {'temperature': z} elif config == 1: # Two features, vertical axis from dataset z = ds['depth'].where(ds['depth'] >= -200, drop=True) pcm_features = {'temperature': z, 'salinity': z} elif config == 2: # Single feature, new vertical axis (from -10. to avoid vertical mixing) z = np.arange(-10., -500, -10.1) pcm_features = {'temperature': z} elif config == 3: # Two features, new vertical axis (from surface to trigger vertical mixing) z = np.arange(0., -500, -10.1) pcm_features = {'temperature': z, 'salinity': z} elif config == 4: # Two features, new vertical axis (from surface to trigger vertical mixing) and a slice feature (single-level) z = np.arange(0., -500, -10.1) pcm_features = {'temperature': z, 'sst': None} elif config == 5: # Two slice features: pcm_features = {'omt': None, 'sst': None} # # default_opts = {'K': 3, 'features': pcm_features, 'reduction': 1, 'debug': 0, 'timeit': 0, 'chunk_size': 'auto'} default_opts = { 'K': 3, 'features': pcm_features } # Default, no options specified for backend in backends: print("\n", "=" * 60, "CONFIG %i / %s / %s" % (config, d, backend), list(pcm_features.keys())) default_opts['backend'] = backend m = pcm(**default_opts) m.fit(ds)
def train_on_interpolated_year( time_i: int = cst.EXAMPLE_TIME_INDEX, k_clusters: int = cst.K_CLUSTERS, maxvar: int = cst.D_PCS, min_depth: float = cst.MIN_DEPTH, max_depth: float = cst.MAX_DEPTH, remove_init_var: bool = True, separate_pca: bool = False, interp: bool = True, remake: bool = cst.REMAKE, ) -> Tuple[pyxpcm.pcm, xr.Dataset]: """Train on interpolated year. Args: time_i (int, optional): time index. Defaults to cst.EXAMPLE_TIME_INDEX. k_clusters (int, optional): clusters. Defaults to cst.K_CLUSTERS. maxvar (int, optional): num pca. Defaults to cst.D_PCS. min_depth (float, optional): minimum depth for column. Defaults to cst.MIN_DEPTH. max_depth (float, optional): maximum depth for column. Defaults to cst.MAX_DEPTH. separate_pca (bool, optional): separate the pca. Defaults to True. remove_init_var (bool, optional): remove initial variables. Defaults to True. Returns: Tuple[pyxpcm.pcm, xr.Dataset]: the fitted object and its corresponding dataset. """ z = np.arange(-min_depth, -max_depth, -10.0) features_pcm = dict() for var in cst.VAR_NAME_LIST: features_pcm[var] = z features = cst.FEATURES_D fname = cst.INTERP_FILE_NAME if not os.path.isfile(fname) or remake is True: if os.path.isfile(fname): os.remove(fname) print("going to save to: ", fname) salt_nc = xr.open_dataset( cst.SALT_FILE).isel(time=slice(time_i, time_i + 12)) theta_nc = xr.open_dataset( cst.THETA_FILE).isel(time=slice(time_i, time_i + 12)) big_nc = xr.merge([salt_nc, theta_nc]) both_nc = big_nc.where(big_nc.coords[cst.DEPTH_NAME] > max_depth).drop( cst.USELESS_LIST) if interp: mult_fact = 2 lons_new = np.linspace(both_nc.XC.min(), both_nc.XC.max(), 60 * 4 * mult_fact) lats_new = np.linspace(both_nc.YC.min(), both_nc.YC.max(), 60 * mult_fact) ds = both_nc.interp(coords={ cst.Y_COORD: lats_new, cst.X_COORD: lons_new }) else: ds = both_nc ds.to_netcdf(fname) else: ds = xr.open_dataset(fname) pcm_object = pcm( K=k_clusters, features=features_pcm, separate_pca=separate_pca, maxvar=maxvar, timeit=True, timeit_verb=1, ) pcm_object.fit(ds, features=features, dim=cst.Z_COORD) pcm_object.add_pca_to_xarray(ds, features=features, dim=cst.Z_COORD, inplace=True) pcm_object.find_i_metric(ds, inplace=True) pcm_object.predict(ds, features=features, dim=cst.Z_COORD, inplace=True) del ds.PCA_VALUES.attrs["_pyXpcm_cleanable"] del ds.IMETRIC.attrs["_pyXpcm_cleanable"] del ds.A_B.attrs["_pyXpcm_cleanable"] del ds.PCM_LABELS.attrs["_pyXpcm_cleanable"] if remove_init_var: ds = ds.drop(cst.VAR_NAME_LIST) # Tuple[pyxpcm.pcm, xr.Dataset] return pcm_object, ds