def test_analyze_model_light(mock_analyze_resampled): estr = SVDPLS() Sigma = np.eye(4) px = 2 ns = np.array([3, 4]) assert_raises(ValueError, gemmr.sample_analysis.analyze_model_light, estr, Sigma, px, [], n_rep=2) assert_raises(ValueError, gemmr.sample_analysis.analyze_model_light, estr, Sigma, px, ns, n_rep=0) n_rep = 5 result = gemmr.sample_analysis.analyze_model_light(estr, Sigma, px, ns, n_rep=5) assert mock_analyze_resampled.call_count > 0 tgt_var1 = xr.DataArray(np.arange(2), dims=('dummy', )).expand_dims(n=np.asarray(ns), rep=np.arange(n_rep)) del tgt_var1.coords['rep'] target_ds = xr.Dataset(dict(var1=tgt_var1)) assert_xr_equal(result, target_ds)
def assert_qty_equal(a, b, check_attrs=True, **kwargs): a = Quantity(a) b = Quantity(b) # check type-specific equal if Quantity is AttrSeries: assert_series_equal(a, b, **kwargs) elif Quantity is DataArray: assert_xr_equal(a, b, **kwargs) # check attributes are equal if check_attrs: assert a.attrs == b.attrs
def test_sda_accessor(): """Test conversion to sparse.COO-backed xr.DataArray.""" x_series = pd.Series( data=[1., 2, 3, 4], index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']], names=['foo', 'bar']), ) y_series = pd.Series(data=[5., 6], index=pd.Index(['e', 'f'], name='baz')) x = SparseDataArray.from_series(x_series) y = SparseDataArray.from_series(y_series) x_dense = x._sda.dense_super y_dense = y._sda.dense_super assert not x_dense._sda.COO_data or x_dense._sda.nan_fill assert not y_dense._sda.COO_data or y_dense._sda.nan_fill # As of sparse 0.10, sparse `y` is automatically broadcast to `x_dense` # Previously, this raised ValueError. x_dense * y z1 = x_dense._sda.convert() * y z2 = x * y_dense._sda.convert() assert z1.dims == ('foo', 'bar', 'baz') == z2.dims assert_xr_equal(z1, z2) z3 = x._sda.convert() * y._sda.convert() assert_xr_equal(z1, z3) z4 = x._sda.convert() * y assert_xr_equal(z1, z4) z5 = SparseDataArray.from_series(x_series) * y assert_xr_equal(z1, z5)
def test_as_sparse_xarray(): """Test conversion to sparse.COO-backed xr.DataArray.""" x_series = pd.Series( data=[1., 2, 3, 4], index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']], names=['foo', 'bar']), ) y_series = pd.Series(data=[5., 6], index=pd.Index(['e', 'f'], name='baz')) x = xr.DataArray.from_series(x_series, sparse=True) y = xr.DataArray.from_series(y_series, sparse=True) x_dense = xr.DataArray.from_series(x_series) y_dense = xr.DataArray.from_series(y_series) with pytest.raises(ValueError, match='make sure that the broadcast shape'): x_dense * y z1 = as_sparse_xarray(x_dense) * y z2 = x * as_sparse_xarray(y_dense) assert z1.dims == ('foo', 'bar', 'baz') assert_xr_equal(z1, z2) z3 = as_sparse_xarray(x) * as_sparse_xarray(y) assert_xr_equal(z1, z3) z4 = as_sparse_xarray(x) * y assert_xr_equal(z1, z4) z5 = as_sparse_xarray(x_series) * y assert_xr_equal(z1, z5)
def test_analyze_subsampled(mock_analyze_resampled): estr = SVDPLS() ns = np.array([2, 3]) n_rep = 4 X = np.arange(10).reshape(5, 2) Y = X assert_raises(ValueError, gemmr.sample_analysis.analyze_subsampled, estr, X, Y, ns=[], n_rep=2) assert_raises(ValueError, gemmr.sample_analysis.analyze_subsampled, estr, X, Y, ns=ns, n_rep=0) assert_raises(ValueError, gemmr.sample_analysis.analyze_subsampled, estr, X[:2], Y[:2], ns=ns, n_rep=2) result = gemmr.sample_analysis.analyze_subsampled(estr, X, Y, ns=ns, n_rep=n_rep) assert mock_analyze_resampled.call_count > 0 print(result) tgt_var1 = xr.DataArray(np.arange(2), dims=('dummy', )).expand_dims(n=np.asarray(ns), rep=np.arange(n_rep)) del tgt_var1.coords['rep'] target_ds = xr.Dataset(dict(var1=tgt_var1)) assert_xr_equal(result, target_ds)
def assert_qty_equal(a, b, check_attrs=True, **kwargs): # py2 compat: import here instead of top of file from xarray import DataArray from xarray.testing import assert_equal as assert_xr_equal from .reporting.utils import Quantity, AttrSeries a = Quantity(a) b = Quantity(b) # check type-specific equal if Quantity is AttrSeries: assert_series_equal(a, b, **kwargs) elif Quantity is DataArray: assert_xr_equal(a, b, **kwargs) # check attributes are equal if check_attrs: assert a.attrs == b.attrs
def test_reporting_file_formats(test_data_path, tmp_path): r = Reporter() expected = xr.DataArray.from_series( pd.read_csv(test_data_path / 'report-input.csv', index_col=['i', 'j'])['value']) # CSV file is automatically parsed to xr.DataArray p1 = test_data_path / 'report-input.csv' k = r.add_file(p1) assert_xr_equal(r.get(k), expected) # Write to CSV p2 = tmp_path / 'report-output.csv' r.write(k, p2) # Output is identical to input file, except for order assert (sorted(p1.read_text().split('\n')) == sorted( p2.read_text().split('\n'))) # Write to Excel p3 = tmp_path / 'report-output.xlsx' r.write(k, p3)
def assert_qty_equal(a, b, check_attrs=True, **kwargs): """Assert that Quantity objects *a* and *b* are equal. When Quantity is AttrSeries, *a* and *b* are first passed through :meth:`as_quantity`. """ from xarray import DataArray from xarray.testing import assert_equal as assert_xr_equal from .reporting.quantity import AttrSeries, Quantity, as_quantity if Quantity is AttrSeries: # Convert pd.Series automatically a = as_quantity(a) if isinstance(a, (pd.Series, DataArray)) else a b = as_quantity(b) if isinstance(b, (pd.Series, DataArray)) else b assert_series_equal(a, b, check_dtype=False, **kwargs) elif Quantity is DataArray: # pragma: no cover assert_xr_equal(a, b, **kwargs) # check attributes are equal if check_attrs: assert a.attrs == b.attrs
def test_analyze_resampled(mock_analyze_dataset): estr = SVDPLS() X = np.arange(10).reshape(5, 2) Y = X result = gemmr.sample_analysis.analyze_resampled(estr, X, Y, perm=0) assert mock_analyze_dataset.call_count > 0 assert_xr_equal(result, mocked_analyze_dataset.return_value) n_perm = 3 result = gemmr.sample_analysis.analyze_resampled(estr, X, Y, perm=n_perm) target_result = mocked_analyze_dataset.return_value.copy() target_result['output_perm'] = target_result.output.expand_dims( perm=range(n_perm)) del target_result.coords['perm'] assert_xr_equal(result, target_result) n_bs = 5 result = gemmr.sample_analysis.analyze_resampled(estr, X, Y, perm=0, n_bs=n_bs, x_align_ref=np.arange(2), y_align_ref=np.arange(2)) target_result = mocked_analyze_dataset.return_value.copy() target_result['output_bs'] = target_result.output.expand_dims( bs=range(n_bs)) del target_result.coords['bs'] assert_xr_equal(result, target_result) result = gemmr.sample_analysis.analyze_resampled(estr, X, Y, perm=0, loo=True, x_align_ref=np.arange(2), y_align_ref=np.arange(2)) target_result = mocked_analyze_dataset.return_value.copy() target_result['output_loo'] = target_result.output.expand_dims( loo=range(len(X))) del target_result.coords['loo'] assert_xr_equal(result, target_result)
def test_analyze_model_parameters( # mock_setup_model, mock_analyze_resampled): def mk_test_stats(Xtest, Ytest, U_latent, V_latent): return dict(test_stat1=2.5, ) def postproc_test(ds): ds['postproc'] = 3.1 rs = np.asarray((1. / 2, 1. / 4)) pxs = np.asarray((8, 9)) n_per_ftrs = np.asarray((6, 7)) n_Sigmas = 10 n_rep = 2 ax, ay = -.5, -1.5 kwargs = dict( model='cca', estr=None, n_rep=n_rep, n_bs=None, # tested in analyze_resampled n_perm=None, # tested in analyze_resampled n_per_ftrs=n_per_ftrs, pxs=pxs, pys='px', rs=rs, n_between_modes=1, n_Sigmas=n_Sigmas, powerlaw_decay=(ax, ay), n_test=2 * 7 * 9 + 1, mk_test_statistics=mk_test_stats, addons=[], # test in analyze_dataset postprocessors=[postproc_test], random_state=0, show_progress=False, ) result = gemmr.sample_analysis.analyzers.analyze_model_parameters(**kwargs) assert mock_analyze_resampled.call_count > 0 assert set(result.data_vars.keys()) == set([ 'var1', 'between_assocs_true', 'between_corrs_true', 'x_weights_true', 'y_weights_true', 'ax', 'ay', 'latent_expl_var_ratios_x', 'latent_expl_var_ratios_y', 'weight_selection_algorithm', 'x_loadings_true', 'x_crossloadings_true', 'y_loadings_true', 'y_crossloadings_true', 'py', 'test_stat1', 'postproc' ]) assert set(result.dims) == set([ 'Sigma_id', 'dummy', 'mode', 'n_per_ftr', 'px', 'r', 'rep', 'x_feature', 'y_feature' ]) assert np.allclose(result.postproc, 3.1) assert_allclose(result.r.values, np.sort(rs)) assert np.all(result.px == pxs) assert np.all(result.n_per_ftr == n_per_ftrs) assert np.all(result.Sigma_id == np.arange(n_Sigmas)) assert np.all(result.x_feature.values == np.arange(np.max(pxs))) assert np.all(result.y_feature.values == np.arange(np.max(pxs))) assert np.all(result.py.values == pxs) target_var1 = xr.DataArray(np.arange(2), dims=('dummy', )).expand_dims( px=pxs, r=np.sort(rs), Sigma_id=np.arange(n_Sigmas), n_per_ftr=n_per_ftrs, rep=np.arange(n_rep)) del target_var1.coords['rep'] assert_xr_equal(result.var1, target_var1) assert result.between_assocs_true.dims == ('px', 'r', 'Sigma_id', 'mode') assert result.x_weights_true.dims == ('px', 'r', 'Sigma_id', 'x_feature', 'mode') assert result.y_weights_true.dims == ('px', 'r', 'Sigma_id', 'y_feature', 'mode') assert result.ax.dims == ( 'px', 'r', 'Sigma_id', ) assert result.ay.dims == ( 'px', 'r', 'Sigma_id', ) assert_allclose(result.ax.values, ax) assert_allclose(result.ay.values, ay) assert result.latent_expl_var_ratios_x.dims == ('px', 'r', 'Sigma_id', 'mode') assert result.latent_expl_var_ratios_y.dims == ('px', 'r', 'Sigma_id', 'mode') assert result.x_loadings_true.dims == ('px', 'r', 'Sigma_id', 'x_feature', 'mode') assert result.x_crossloadings_true.dims == ('px', 'r', 'Sigma_id', 'x_feature', 'mode') assert result.y_loadings_true.dims == ('px', 'r', 'Sigma_id', 'y_feature', 'mode') assert result.y_crossloadings_true.dims == ('px', 'r', 'Sigma_id', 'y_feature', 'mode') assert result.test_stat1.dims == ('px', 'r', 'Sigma_id') assert np.allclose(result.test_stat1.values, 2.5) kwargs['rotate_XY'] = True assert_warns(UserWarning, gemmr.sample_analysis.analyzers.analyze_model_parameters, **kwargs) del kwargs['rotate_XY'] kwargs['n_test'] = 1 assert_warns(UserWarning, gemmr.sample_analysis.analyzers.analyze_model_parameters, **kwargs)
def test_analyze_dataset(): def addon_test(estr, X, Y, Xorig, Yorig, x_align_ref, y_align_ref, results, **kwargs): results['addon_var'] = 3. / 8 estr = SVDPLS() X = np.arange(10).reshape(5, 2) Y = X result = gemmr.sample_analysis.analyze_dataset(estr, X, Y, addons=[addon_test]) assert np.isclose(result.between_corrs_sample, 1.) assert np.isclose(result.addon_var, 3. / 8) estr.fit(X, Y) assert np.isclose(estr.assocs_[0], result.between_assocs) assert np.isclose( np.cov(estr.x_scores_[:, 0], estr.y_scores_[:, 0])[0, 1], result.between_covs_sample) tgt_weights = xr.DataArray([[1. / np.sqrt(2)] * 2], dims=('mode', 'x_feature'), coords=dict(x_feature=np.arange(2)), name='x_weights').T tgt_loadings = xr.DataArray(np.ones((1, 2), dtype=float), dims=('mode', 'x_orig_feature'), coords=dict(x_orig_feature=np.arange(2)), name='x_loadings').T assert_xr_allclose(result.x_weights, tgt_weights) assert_xr_allclose(result.x_loadings, tgt_loadings) tgt_weights = tgt_weights.rename('y_weights').rename(x_feature='y_feature') tgt_loadings = tgt_loadings.rename('y_loadings').rename( x_orig_feature='y_orig_feature') assert_xr_allclose(result.y_weights, tgt_weights) assert_xr_allclose(result.y_loadings, tgt_loadings) ### assert np.all(result.x_weights.values > 0) assert np.all(result.y_weights.values > 0) # i.e. if we now use rerun analyze_dataset with ``?_align_ref`` the weights should be negative result = gemmr.sample_analysis.analyze_dataset(estr, X, Y, x_align_ref=-np.eye(2), y_align_ref=-np.eye(2)) ### class MockEstr(): def fit(self, X, Y): raise ValueError() mock_estr = MockEstr() result = gemmr.sample_analysis.analyze_dataset(mock_estr, X, Y, addons=[addon_test]) da_nan = xr.DataArray(np.nan * np.empty((2, 1)), dims=('x_feature', 'mode'), coords=dict(x_feature=np.arange(2))) target_result = xr.Dataset( dict(between_assocs=np.nan, between_covs_sample=np.nan, between_corrs_sample=np.nan, addon_var=3. / 8, x_weights=da_nan, y_weights=da_nan.rename(x_feature='y_feature'), x_loadings=da_nan.rename(x_feature='x_orig_feature'), y_loadings=da_nan.rename(x_feature='y_orig_feature'))) assert_xr_equal(result, target_result)