def test_multivariate_input_shape(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 7 n_dims_w = 6 Xw = [rng_global.randn(n_features, n_dims_w) for i in range(n_samples)] dico = MultivariateDictLearning(n_kernels=n_kernels).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MultivariateDictLearning(n_kernels=n_kernels) assert_raises(ValueError, dico.fit, Xw) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels) assert_raises(ValueError, dico.fit, Xw) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels).partial_fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels) assert_raises(ValueError, dico.partial_fit, Xw)
def test_dict_init(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 d = [rng_global.randn(n_features, n_dims) for i in range(n_kernels)] for i in range(len(d)): d[i] /= np.linalg.norm(d[i], "fro") dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=1, n_nonzero_coefs=1, learning_rate=0.0, dict_init=d, verbose=5, ).fit(X) dico = dico.fit(X) for i in range(n_kernels): assert_array_almost_equal(dico.kernels_[i], d[i]) # code = dico.fit(X).transform(X[0]) # assert (len(code[0]) > 1) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=1, n_nonzero_coefs=1, dict_init=d, verbose=1, learning_rate=0.0, ).fit(X) dico = dico.fit(X) for i in range(n_kernels): assert_array_almost_equal(dico.kernels_[i], d[i])
def test_callback(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 def my_callback(loc): _ = loc["dict_obj"] dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=1, callback=my_callback, ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 1 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=2, n_nonzero_coefs=1, callback=my_callback, ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 1
def test_sparse_encode(): n_kernels = 8 dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=1) dico = dico.fit(X) _, code = multivariate_sparse_encode(X, dico, n_nonzero_coefs=1, n_jobs=-1, verbose=3) assert_true(len(code[0]) <= 1)
def test_sparse_encode(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=1 ) dico = dico.fit(X) _, code = multivariate_sparse_encode(X, dico, n_nonzero_coefs=1, n_jobs=-1, verbose=3) assert len(code[0]) <= 1
def test_mdla_nonzero_coefs(): n_kernels = 8 dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=3, n_nonzero_coefs=3, verbose=5) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 3) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=3, verbose=5) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 3)
def test_X_array(): n_kernels = 8 X = rng_global.randn(n_samples, n_features, n_dims) dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=3, n_nonzero_coefs=3, verbose=5) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 3) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=3, verbose=5) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 3)
def test_X_array(): n_samples, n_features, n_dims = 10, 5, 3 n_kernels = 8 X = rng_global.randn(n_samples, n_features, n_dims) dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=3, n_nonzero_coefs=3, verbose=5 ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 3 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=3, verbose=5 ) code = dico.fit(X).transform(X[0]) assert len(code[0]) <= 3
def test_callback(): n_kernels = 8 def my_callback(loc): d = loc['dict_obj'] dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=1, callback=my_callback) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 1) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=2, n_nonzero_coefs=1, callback=my_callback) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 1)
def test_mdla_dict_update(): n_kernels = 10 # n_samples, n_features, n_dims = 100, 5, 3 n_samples, n_features, n_dims = 80, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=10, n_jobs=-1 ).fit(X) first_epoch = list(dico.kernels_) dico = dico.fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert (k - c).sum() != 0.0 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1 ).fit(X) first_epoch = list(dico.kernels_) dico = dico.fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert (k - c).sum() != 0.0 dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1 ).partial_fit(X) first_epoch = list(dico.kernels_) dico = dico.partial_fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert (k - c).sum() != 0.0
def test_mdla_nonzero_coefs(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=3, n_nonzero_coefs=3, verbose=5) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 3) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=3, n_nonzero_coefs=3, verbose=5) code = dico.fit(X).transform(X[0]) assert_true(len(code[0]) <= 3)
def test_dict_init(): n_kernels = 8 d = [rng_global.randn(n_features, n_dims) for i in range(n_kernels)] for i in range(len(d)): d[i] /= np.linalg.norm(d[i], 'fro') dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=1, n_nonzero_coefs=1, learning_rate=0., dict_init=d, verbose=5).fit(X) dico = dico.fit(X) for i in range(n_kernels): assert_array_almost_equal(dico.kernels_[i], d[i]) # code = dico.fit(X).transform(X[0]) # assert_true(len(code[0]) > 1) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=1, n_nonzero_coefs=1, dict_init=d, verbose=1, learning_rate=0.).fit(X) dico = dico.fit(X) for i in range(n_kernels): assert_array_almost_equal(dico.kernels_[i], d[i])
def test_mdla_dict_init(): n_kernels = 10 n_samples, n_features, n_dims = 20, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] dict_init = [np.random.randn(n_features, n_dims) for i in range(n_kernels)] dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=10, dict_init=dict_init ).fit(X) diff = 0.0 for i in range(n_kernels): diff = diff + (dico.kernels_[i] - dict_init[i]).sum() assert diff != 0
def test_mdla_dict_update(): n_kernels = 10 # n_samples, n_features, n_dims = 100, 5, 3 n_samples, n_features, n_dims = 80, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] dico = MultivariateDictLearning(n_kernels=n_kernels, random_state=0, max_iter=10, n_jobs=-1).fit(X) first_epoch = list(dico.kernels_) dico = dico.fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert_true((k-c).sum() != 0.) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1).fit(X) first_epoch = list(dico.kernels_) dico = dico.fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert_true((k-c).sum() != 0.) dico = MiniBatchMultivariateDictLearning(n_kernels=n_kernels, random_state=0, n_iter=10, n_jobs=-1).partial_fit(X) first_epoch = list(dico.kernels_) dico = dico.partial_fit(X) second_epoch = list(dico.kernels_) for k, c in zip(first_epoch, second_epoch): assert_true((k-c).sum() != 0.)
def test_mdla_nonzero_coef_errors(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, n_nonzero_coefs=0 ) assert_raises(ValueError, dico.fit, X) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=2, n_nonzero_coefs=n_kernels + 1 ) assert_raises(ValueError, dico.fit, X)
def test_mdla_normalization(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=2, verbose=1 ).fit(X) for k in dico.kernels_: assert_almost_equal(np.linalg.norm(k, "fro"), 1.0) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, n_iter=2, verbose=1 ).fit(X) for k in dico.kernels_: assert_almost_equal(np.linalg.norm(k, "fro"), 1.0)
def test_mdla_shapes(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] n_kernels = 8 dico = MultivariateDictLearning( n_kernels=n_kernels, random_state=0, max_iter=10, verbose=5 ).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims) dico = MiniBatchMultivariateDictLearning( n_kernels=n_kernels, random_state=0, verbose=5, n_iter=10 ).fit(X) for i in range(n_kernels): assert dico.kernels_[i].shape == (n_features, n_dims)
def test_n_kernels(): n_samples, n_features, n_dims = 10, 5, 3 X = [rng_global.randn(n_features, n_dims) for i in range(n_samples)] dico = MultivariateDictLearning( random_state=0, max_iter=2, n_nonzero_coefs=1, verbose=5 ).fit(X) assert len(dico.kernels_) == 2 * n_features dico = MiniBatchMultivariateDictLearning( random_state=0, n_iter=2, n_nonzero_coefs=1, verbose=5 ).fit(X) assert len(dico.kernels_) == 2 * n_features dico = MiniBatchMultivariateDictLearning( random_state=0, n_iter=2, n_nonzero_coefs=1, verbose=5 ).partial_fit(X) assert len(dico.kernels_) == 2 * n_features
# Batch learning mp_range = range(1, cpu_count() + 1) for p in mp_range: print('\nProcessing ', max_iter, 'iterations in batch mode, with', p, 'processes:', end='') n_jobs = p learned_dict = MultivariateDictLearning(n_kernels=n_kernels, max_iter=max_iter, verbose=1, n_nonzero_coefs=n_nonzero_coefs, n_jobs=n_jobs, learning_rate=learning_rate, kernel_init_len=kernel_init_len, dict_init=None, random_state=rng_global) ts = time() learned_dict = learned_dict.fit(X) iter_time.append((time() - ts) / max_iter) it_separator += 1 plot_separator.append(it_separator) print('Done benchmarking') figname = 'minibatch-performance' print('Plotting results in', figname) benchmarking_plot(figname, iter_time, plot_separator, minibatch_range, mp_range)