示例#1
0
def test_analyze_model_light(mock_analyze_resampled):

    estr = SVDPLS()
    Sigma = np.eye(4)
    px = 2
    ns = np.array([3, 4])
    assert_raises(ValueError,
                  gemmr.sample_analysis.analyze_model_light,
                  estr,
                  Sigma,
                  px, [],
                  n_rep=2)
    assert_raises(ValueError,
                  gemmr.sample_analysis.analyze_model_light,
                  estr,
                  Sigma,
                  px,
                  ns,
                  n_rep=0)

    n_rep = 5
    result = gemmr.sample_analysis.analyze_model_light(estr,
                                                       Sigma,
                                                       px,
                                                       ns,
                                                       n_rep=5)
    assert mock_analyze_resampled.call_count > 0

    tgt_var1 = xr.DataArray(np.arange(2),
                            dims=('dummy', )).expand_dims(n=np.asarray(ns),
                                                          rep=np.arange(n_rep))
    del tgt_var1.coords['rep']
    target_ds = xr.Dataset(dict(var1=tgt_var1))
    assert_xr_equal(result, target_ds)
示例#2
0
文件: testing.py 项目: zhouwenji/ixmp
def assert_qty_equal(a, b, check_attrs=True, **kwargs):
    a = Quantity(a)
    b = Quantity(b)

    # check type-specific equal
    if Quantity is AttrSeries:
        assert_series_equal(a, b, **kwargs)
    elif Quantity is DataArray:
        assert_xr_equal(a, b, **kwargs)

    # check attributes are equal
    if check_attrs:
        assert a.attrs == b.attrs
示例#3
0
def test_sda_accessor():
    """Test conversion to sparse.COO-backed xr.DataArray."""
    x_series = pd.Series(
        data=[1., 2, 3, 4],
        index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']],
                                         names=['foo', 'bar']),
    )
    y_series = pd.Series(data=[5., 6], index=pd.Index(['e', 'f'], name='baz'))

    x = SparseDataArray.from_series(x_series)
    y = SparseDataArray.from_series(y_series)

    x_dense = x._sda.dense_super
    y_dense = y._sda.dense_super
    assert not x_dense._sda.COO_data or x_dense._sda.nan_fill
    assert not y_dense._sda.COO_data or y_dense._sda.nan_fill

    # As of sparse 0.10, sparse `y` is automatically broadcast to `x_dense`
    # Previously, this raised ValueError.
    x_dense * y

    z1 = x_dense._sda.convert() * y

    z2 = x * y_dense._sda.convert()
    assert z1.dims == ('foo', 'bar', 'baz') == z2.dims
    assert_xr_equal(z1, z2)

    z3 = x._sda.convert() * y._sda.convert()
    assert_xr_equal(z1, z3)

    z4 = x._sda.convert() * y
    assert_xr_equal(z1, z4)

    z5 = SparseDataArray.from_series(x_series) * y
    assert_xr_equal(z1, z5)
示例#4
0
def test_as_sparse_xarray():
    """Test conversion to sparse.COO-backed xr.DataArray."""
    x_series = pd.Series(
        data=[1., 2, 3, 4],
        index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']],
                                         names=['foo', 'bar']),
    )
    y_series = pd.Series(data=[5., 6], index=pd.Index(['e', 'f'], name='baz'))

    x = xr.DataArray.from_series(x_series, sparse=True)
    y = xr.DataArray.from_series(y_series, sparse=True)

    x_dense = xr.DataArray.from_series(x_series)
    y_dense = xr.DataArray.from_series(y_series)

    with pytest.raises(ValueError, match='make sure that the broadcast shape'):
        x_dense * y

    z1 = as_sparse_xarray(x_dense) * y
    z2 = x * as_sparse_xarray(y_dense)
    assert z1.dims == ('foo', 'bar', 'baz')
    assert_xr_equal(z1, z2)

    z3 = as_sparse_xarray(x) * as_sparse_xarray(y)
    assert_xr_equal(z1, z3)

    z4 = as_sparse_xarray(x) * y
    assert_xr_equal(z1, z4)

    z5 = as_sparse_xarray(x_series) * y
    assert_xr_equal(z1, z5)
示例#5
0
def test_analyze_subsampled(mock_analyze_resampled):

    estr = SVDPLS()
    ns = np.array([2, 3])
    n_rep = 4

    X = np.arange(10).reshape(5, 2)
    Y = X

    assert_raises(ValueError,
                  gemmr.sample_analysis.analyze_subsampled,
                  estr,
                  X,
                  Y,
                  ns=[],
                  n_rep=2)
    assert_raises(ValueError,
                  gemmr.sample_analysis.analyze_subsampled,
                  estr,
                  X,
                  Y,
                  ns=ns,
                  n_rep=0)
    assert_raises(ValueError,
                  gemmr.sample_analysis.analyze_subsampled,
                  estr,
                  X[:2],
                  Y[:2],
                  ns=ns,
                  n_rep=2)

    result = gemmr.sample_analysis.analyze_subsampled(estr,
                                                      X,
                                                      Y,
                                                      ns=ns,
                                                      n_rep=n_rep)
    assert mock_analyze_resampled.call_count > 0

    print(result)
    tgt_var1 = xr.DataArray(np.arange(2),
                            dims=('dummy', )).expand_dims(n=np.asarray(ns),
                                                          rep=np.arange(n_rep))
    del tgt_var1.coords['rep']
    target_ds = xr.Dataset(dict(var1=tgt_var1))
    assert_xr_equal(result, target_ds)
示例#6
0
def assert_qty_equal(a, b, check_attrs=True, **kwargs):
    # py2 compat: import here instead of top of file
    from xarray import DataArray
    from xarray.testing import assert_equal as assert_xr_equal

    from .reporting.utils import Quantity, AttrSeries

    a = Quantity(a)
    b = Quantity(b)

    # check type-specific equal
    if Quantity is AttrSeries:
        assert_series_equal(a, b, **kwargs)
    elif Quantity is DataArray:
        assert_xr_equal(a, b, **kwargs)

    # check attributes are equal
    if check_attrs:
        assert a.attrs == b.attrs
示例#7
0
def test_reporting_file_formats(test_data_path, tmp_path):
    r = Reporter()

    expected = xr.DataArray.from_series(
        pd.read_csv(test_data_path / 'report-input.csv',
                    index_col=['i', 'j'])['value'])

    # CSV file is automatically parsed to xr.DataArray
    p1 = test_data_path / 'report-input.csv'
    k = r.add_file(p1)
    assert_xr_equal(r.get(k), expected)

    # Write to CSV
    p2 = tmp_path / 'report-output.csv'
    r.write(k, p2)

    # Output is identical to input file, except for order
    assert (sorted(p1.read_text().split('\n')) == sorted(
        p2.read_text().split('\n')))

    # Write to Excel
    p3 = tmp_path / 'report-output.xlsx'
    r.write(k, p3)
示例#8
0
def assert_qty_equal(a, b, check_attrs=True, **kwargs):
    """Assert that Quantity objects *a* and *b* are equal.

    When Quantity is AttrSeries, *a* and *b* are first passed through
    :meth:`as_quantity`.
    """
    from xarray import DataArray
    from xarray.testing import assert_equal as assert_xr_equal

    from .reporting.quantity import AttrSeries, Quantity, as_quantity

    if Quantity is AttrSeries:
        # Convert pd.Series automatically
        a = as_quantity(a) if isinstance(a, (pd.Series, DataArray)) else a
        b = as_quantity(b) if isinstance(b, (pd.Series, DataArray)) else b

        assert_series_equal(a, b, check_dtype=False, **kwargs)
    elif Quantity is DataArray:  # pragma: no cover
        assert_xr_equal(a, b, **kwargs)

    # check attributes are equal
    if check_attrs:
        assert a.attrs == b.attrs
示例#9
0
def test_analyze_resampled(mock_analyze_dataset):

    estr = SVDPLS()
    X = np.arange(10).reshape(5, 2)
    Y = X

    result = gemmr.sample_analysis.analyze_resampled(estr, X, Y, perm=0)
    assert mock_analyze_dataset.call_count > 0

    assert_xr_equal(result, mocked_analyze_dataset.return_value)

    n_perm = 3
    result = gemmr.sample_analysis.analyze_resampled(estr, X, Y, perm=n_perm)
    target_result = mocked_analyze_dataset.return_value.copy()
    target_result['output_perm'] = target_result.output.expand_dims(
        perm=range(n_perm))
    del target_result.coords['perm']
    assert_xr_equal(result, target_result)

    n_bs = 5
    result = gemmr.sample_analysis.analyze_resampled(estr,
                                                     X,
                                                     Y,
                                                     perm=0,
                                                     n_bs=n_bs,
                                                     x_align_ref=np.arange(2),
                                                     y_align_ref=np.arange(2))
    target_result = mocked_analyze_dataset.return_value.copy()
    target_result['output_bs'] = target_result.output.expand_dims(
        bs=range(n_bs))
    del target_result.coords['bs']
    assert_xr_equal(result, target_result)

    result = gemmr.sample_analysis.analyze_resampled(estr,
                                                     X,
                                                     Y,
                                                     perm=0,
                                                     loo=True,
                                                     x_align_ref=np.arange(2),
                                                     y_align_ref=np.arange(2))
    target_result = mocked_analyze_dataset.return_value.copy()
    target_result['output_loo'] = target_result.output.expand_dims(
        loo=range(len(X)))
    del target_result.coords['loo']
    assert_xr_equal(result, target_result)
示例#10
0
def test_analyze_model_parameters(
        # mock_setup_model,
        mock_analyze_resampled):
    def mk_test_stats(Xtest, Ytest, U_latent, V_latent):
        return dict(test_stat1=2.5, )

    def postproc_test(ds):
        ds['postproc'] = 3.1

    rs = np.asarray((1. / 2, 1. / 4))
    pxs = np.asarray((8, 9))
    n_per_ftrs = np.asarray((6, 7))
    n_Sigmas = 10
    n_rep = 2
    ax, ay = -.5, -1.5
    kwargs = dict(
        model='cca',
        estr=None,
        n_rep=n_rep,
        n_bs=None,  # tested in analyze_resampled
        n_perm=None,  # tested in analyze_resampled
        n_per_ftrs=n_per_ftrs,
        pxs=pxs,
        pys='px',
        rs=rs,
        n_between_modes=1,
        n_Sigmas=n_Sigmas,
        powerlaw_decay=(ax, ay),
        n_test=2 * 7 * 9 + 1,
        mk_test_statistics=mk_test_stats,
        addons=[],  # test in analyze_dataset
        postprocessors=[postproc_test],
        random_state=0,
        show_progress=False,
    )
    result = gemmr.sample_analysis.analyzers.analyze_model_parameters(**kwargs)

    assert mock_analyze_resampled.call_count > 0

    assert set(result.data_vars.keys()) == set([
        'var1', 'between_assocs_true', 'between_corrs_true', 'x_weights_true',
        'y_weights_true', 'ax', 'ay', 'latent_expl_var_ratios_x',
        'latent_expl_var_ratios_y', 'weight_selection_algorithm',
        'x_loadings_true', 'x_crossloadings_true', 'y_loadings_true',
        'y_crossloadings_true', 'py', 'test_stat1', 'postproc'
    ])
    assert set(result.dims) == set([
        'Sigma_id', 'dummy', 'mode', 'n_per_ftr', 'px', 'r', 'rep',
        'x_feature', 'y_feature'
    ])

    assert np.allclose(result.postproc, 3.1)
    assert_allclose(result.r.values, np.sort(rs))
    assert np.all(result.px == pxs)
    assert np.all(result.n_per_ftr == n_per_ftrs)
    assert np.all(result.Sigma_id == np.arange(n_Sigmas))
    assert np.all(result.x_feature.values == np.arange(np.max(pxs)))
    assert np.all(result.y_feature.values == np.arange(np.max(pxs)))

    assert np.all(result.py.values == pxs)

    target_var1 = xr.DataArray(np.arange(2), dims=('dummy', )).expand_dims(
        px=pxs,
        r=np.sort(rs),
        Sigma_id=np.arange(n_Sigmas),
        n_per_ftr=n_per_ftrs,
        rep=np.arange(n_rep))
    del target_var1.coords['rep']
    assert_xr_equal(result.var1, target_var1)

    assert result.between_assocs_true.dims == ('px', 'r', 'Sigma_id', 'mode')

    assert result.x_weights_true.dims == ('px', 'r', 'Sigma_id', 'x_feature',
                                          'mode')
    assert result.y_weights_true.dims == ('px', 'r', 'Sigma_id', 'y_feature',
                                          'mode')

    assert result.ax.dims == (
        'px',
        'r',
        'Sigma_id',
    )
    assert result.ay.dims == (
        'px',
        'r',
        'Sigma_id',
    )
    assert_allclose(result.ax.values, ax)
    assert_allclose(result.ay.values, ay)

    assert result.latent_expl_var_ratios_x.dims == ('px', 'r', 'Sigma_id',
                                                    'mode')
    assert result.latent_expl_var_ratios_y.dims == ('px', 'r', 'Sigma_id',
                                                    'mode')

    assert result.x_loadings_true.dims == ('px', 'r', 'Sigma_id', 'x_feature',
                                           'mode')
    assert result.x_crossloadings_true.dims == ('px', 'r', 'Sigma_id',
                                                'x_feature', 'mode')
    assert result.y_loadings_true.dims == ('px', 'r', 'Sigma_id', 'y_feature',
                                           'mode')
    assert result.y_crossloadings_true.dims == ('px', 'r', 'Sigma_id',
                                                'y_feature', 'mode')

    assert result.test_stat1.dims == ('px', 'r', 'Sigma_id')
    assert np.allclose(result.test_stat1.values, 2.5)

    kwargs['rotate_XY'] = True
    assert_warns(UserWarning,
                 gemmr.sample_analysis.analyzers.analyze_model_parameters,
                 **kwargs)
    del kwargs['rotate_XY']

    kwargs['n_test'] = 1
    assert_warns(UserWarning,
                 gemmr.sample_analysis.analyzers.analyze_model_parameters,
                 **kwargs)
示例#11
0
def test_analyze_dataset():
    def addon_test(estr, X, Y, Xorig, Yorig, x_align_ref, y_align_ref, results,
                   **kwargs):
        results['addon_var'] = 3. / 8

    estr = SVDPLS()
    X = np.arange(10).reshape(5, 2)
    Y = X

    result = gemmr.sample_analysis.analyze_dataset(estr,
                                                   X,
                                                   Y,
                                                   addons=[addon_test])

    assert np.isclose(result.between_corrs_sample, 1.)
    assert np.isclose(result.addon_var, 3. / 8)

    estr.fit(X, Y)
    assert np.isclose(estr.assocs_[0], result.between_assocs)

    assert np.isclose(
        np.cov(estr.x_scores_[:, 0], estr.y_scores_[:, 0])[0, 1],
        result.between_covs_sample)

    tgt_weights = xr.DataArray([[1. / np.sqrt(2)] * 2],
                               dims=('mode', 'x_feature'),
                               coords=dict(x_feature=np.arange(2)),
                               name='x_weights').T
    tgt_loadings = xr.DataArray(np.ones((1, 2), dtype=float),
                                dims=('mode', 'x_orig_feature'),
                                coords=dict(x_orig_feature=np.arange(2)),
                                name='x_loadings').T
    assert_xr_allclose(result.x_weights, tgt_weights)
    assert_xr_allclose(result.x_loadings, tgt_loadings)

    tgt_weights = tgt_weights.rename('y_weights').rename(x_feature='y_feature')
    tgt_loadings = tgt_loadings.rename('y_loadings').rename(
        x_orig_feature='y_orig_feature')

    assert_xr_allclose(result.y_weights, tgt_weights)
    assert_xr_allclose(result.y_loadings, tgt_loadings)

    ###

    assert np.all(result.x_weights.values > 0)
    assert np.all(result.y_weights.values > 0)
    # i.e. if we now use rerun analyze_dataset with ``?_align_ref`` the weights should be negative

    result = gemmr.sample_analysis.analyze_dataset(estr,
                                                   X,
                                                   Y,
                                                   x_align_ref=-np.eye(2),
                                                   y_align_ref=-np.eye(2))

    ###

    class MockEstr():
        def fit(self, X, Y):
            raise ValueError()

    mock_estr = MockEstr()
    result = gemmr.sample_analysis.analyze_dataset(mock_estr,
                                                   X,
                                                   Y,
                                                   addons=[addon_test])

    da_nan = xr.DataArray(np.nan * np.empty((2, 1)),
                          dims=('x_feature', 'mode'),
                          coords=dict(x_feature=np.arange(2)))
    target_result = xr.Dataset(
        dict(between_assocs=np.nan,
             between_covs_sample=np.nan,
             between_corrs_sample=np.nan,
             addon_var=3. / 8,
             x_weights=da_nan,
             y_weights=da_nan.rename(x_feature='y_feature'),
             x_loadings=da_nan.rename(x_feature='x_orig_feature'),
             y_loadings=da_nan.rename(x_feature='y_orig_feature')))
    assert_xr_equal(result, target_result)