Exemplo n.º 1
0
    def test_finds_elements_of_4_point_convex_hull(self):
        """Test finds archetypes in convex hull for 3D example."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 3
        n_samples = 123
        n_components = 4
        max_iter = 500
        tolerance = 1e-12

        basis = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])

        expected_S = right_stochastic_matrix((n_samples, n_components),
                                             random_state=random_state)

        assignments = np.array([8, 9, 56, 90])
        for i in range(n_components):
            expected_S[assignments[i]] = np.zeros(n_components)
            expected_S[assignments[i], i] = 1

        expected_C = np.zeros((n_components, n_samples), dtype='f8')
        for i in range(n_components):
            expected_C[i, assignments[i]] = 1

        X = expected_S.dot(basis)

        self.assertTrue(
            np.linalg.norm(X - expected_S.dot(expected_C.dot(X))) < tolerance)

        K = X.dot(X.T)

        C = right_stochastic_matrix((n_components, n_samples),
                                    random_state=random_state)
        S = right_stochastic_matrix((n_samples, n_components),
                                    random_state=random_state)

        self.assertTrue(np.allclose(C.sum(axis=1), 1, 1e-12))
        self.assertTrue(np.allclose(S.sum(axis=1), 1, 1e-12))

        delta = 0
        aa = KernelAA(n_components=n_components,
                      delta=delta,
                      init='custom',
                      max_iterations=max_iter,
                      tolerance=tolerance)

        solution_S = aa.fit_transform(K, dictionary=C, weights=S)
        solution_C = aa.dictionary_

        self.assertTrue(aa.n_iter_ < max_iter)

        self.assertTrue(np.allclose(solution_C.sum(axis=1), 1, 1e-12))
        self.assertTrue(np.allclose(solution_S.sum(axis=1), 1, 1e-12))

        main_components = solution_C.argmax(axis=1)
        main_components = sorted(main_components)
        for i in range(n_components):
            self.assertEqual(main_components[i], assignments[i])
Exemplo n.º 2
0
    def test_analytic_gradient_matches_numerical_gradient_with_zero_delta(
            self):
        """Test analytical gradient matches finite difference approximation."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 3
        n_components = 5
        n_samples = 10

        X = random_state.uniform(size=(n_samples, n_features))
        K = X.dot(X.T)

        C = right_stochastic_matrix((n_components, n_samples),
                                    random_state=random_state)
        S = right_stochastic_matrix((n_samples, n_components),
                                    random_state=random_state)

        self.assertTrue(np.allclose(C.sum(axis=1), 1, 1e-12))
        self.assertTrue(np.allclose(S.sum(axis=1), 1, 1e-12))

        delta = 0
        aa = KernelAA(n_components=n_components, delta=delta)

        aa.K = K
        aa.C = C
        aa.S = S

        aa._initialize_workspace()
        aa._update_weights_gradient()

        analytic_grad_S = aa.grad_S

        def central_difference_deriv(i, j, h=1e-4):
            old_x = S[i, j]

            xmh = old_x - h
            aa.S[i, j] = xmh
            aa._initialize_workspace()
            fmh = aa._evaluate_cost()

            xph = old_x + h
            aa.S[i, j] = xph
            aa._initialize_workspace()
            fph = aa._evaluate_cost()

            aa.S[i, j] = old_x

            return (fph - fmh) / (2 * h)

        numeric_grad_S = np.zeros((n_samples, n_components))
        for i in range(n_samples):
            for j in range(n_components):
                numeric_grad_S[i, j] = central_difference_deriv(i, j)

        self.assertTrue(np.allclose(analytic_grad_S, numeric_grad_S, 1e-4))
Exemplo n.º 3
0
    def test_repeated_updates_converge_with_nonzero_delta(self):
        """Test repeated updates converge to a fixed point with non-zero delta."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 30
        n_components = 11
        n_samples = 320
        max_iter = 1000
        tolerance = 1e-4

        X = random_state.uniform(size=(n_samples, n_features))
        K = X.dot(X.T)

        C = right_stochastic_matrix((n_components, n_samples),
                                    random_state=random_state)
        S = right_stochastic_matrix((n_samples, n_components),
                                    random_state=random_state)

        self.assertTrue(np.allclose(C.sum(axis=1), 1, tolerance))
        self.assertTrue(np.allclose(S.sum(axis=1), 1, tolerance))

        delta = 3.2
        aa = KernelAA(n_components=n_components, delta=delta)

        aa.K = K
        aa.C = C
        aa.S = S

        aa._initialize_workspace()

        cost_delta = 1 + tolerance
        old_cost = aa._evaluate_cost()
        new_cost = old_cost
        n_iter = 0
        while abs(cost_delta) > tolerance and n_iter < max_iter:
            old_cost = new_cost
            error = aa._update_dictionary()
            self.assertEqual(error, 0)
            new_cost = aa._evaluate_cost()

            cost_delta = new_cost - old_cost

            self.assertTrue(cost_delta <= 0)

            n_iter += 1

        self.assertTrue(n_iter < max_iter)

        updated_C = aa.C
        self.assertTrue(np.allclose(updated_C.sum(axis=1), 1, 1e-12))

        updated_alpha = aa.alpha
        for i in range(n_components):
            self.assertTrue(1 - delta <= updated_alpha[i] <= 1 + delta)
Exemplo n.º 4
0
    def test_repeated_updates_converge_with_zero_delta(self):
        """Test repeated updates converge to a fixed point with delta = 0."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 10
        n_components = 3
        n_samples = 600
        max_iter = 100
        tolerance = 1e-6

        X = random_state.uniform(size=(n_samples, n_features))
        K = X.dot(X.T)

        C = right_stochastic_matrix((n_components, n_samples),
                                    random_state=random_state)
        S = right_stochastic_matrix((n_samples, n_components),
                                    random_state=random_state)

        self.assertTrue(np.allclose(C.sum(axis=1), 1, tolerance))
        self.assertTrue(np.allclose(S.sum(axis=1), 1, tolerance))

        delta = 0
        aa = KernelAA(n_components=n_components, delta=delta)

        aa.K = K
        aa.C = C
        aa.S = S

        aa._initialize_workspace()

        cost_delta = 1 + tolerance
        old_cost = aa._evaluate_cost()
        new_cost = old_cost
        n_iter = 0
        while abs(cost_delta) > tolerance and n_iter < max_iter:
            old_cost = new_cost
            error = aa._update_weights()
            self.assertEqual(error, 0)
            new_cost = aa._evaluate_cost()

            cost_delta = new_cost - old_cost

            self.assertTrue(cost_delta <= 0)

            n_iter += 1

        self.assertTrue(n_iter < max_iter)

        updated_S = aa.S
        self.assertTrue(np.allclose(updated_S.sum(axis=1), 1, 1e-12))
Exemplo n.º 5
0
    def test_single_update_reduces_cost_function_with_nonzero_epsilon(self):
        """Test single dictionary update reduces cost function."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 11
        n_components = 6
        n_samples = 230

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-14))

        model = GPNHConvexCoding(n_components=n_components, epsilon_states=3.2)

        model.X = X
        model.Gamma = Gamma
        model.S = S

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_dictionary()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        self.assertTrue(final_cost <= initial_cost)
Exemplo n.º 6
0
    def test_returns_zero_for_perfect_reconstruction(self):
        """Test cost is zero for perfect factorization."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 5
        n_components = 3
        n_samples = 30
        tolerance = 1e-14

        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        model = GPNHConvexCoding(n_components=n_components, epsilon_states=0)

        model.X = X
        model.Gamma = Gamma
        model.S = S

        model._initialize_workspace()

        cost = model._evaluate_cost()
        expected_cost = 0

        self.assertEqual(cost, expected_cost)
Exemplo n.º 7
0
def _initialize_kernel_aa_weights_random(kernel,
                                         n_components,
                                         random_state=None):
    rng = check_random_state(random_state)

    n_samples = kernel.shape[0]

    return right_stochastic_matrix((n_samples, n_components), random_state=rng)
Exemplo n.º 8
0
def _initialize_gpnh_convex_coding_weights_random(data,
                                                  n_components,
                                                  random_state=None):
    rng = check_random_state(random_state)

    n_samples = data.shape[0]

    return right_stochastic_matrix((n_samples, n_components), random_state=rng)
Exemplo n.º 9
0
    def test_exact_solution_is_fixed_point_with_zero_epsilon(self):
        """Test exact solution is a fixed point of update step."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 5
        n_components = 6
        n_samples = 40
        tolerance = 1e-12

        S = random_state.uniform(size=(n_components, n_features))

        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        lags = [1, 12]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_components, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=0,
                                epsilon_weights=0,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_weights()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        updated_Gamma = model.Gamma

        self.assertTrue(final_cost <= initial_cost)
        self.assertTrue(np.allclose(Gamma, updated_Gamma, tolerance))
Exemplo n.º 10
0
    def test_single_dictionary_update_reduces_cost_with_nonzero_delta(self):
        """Test single update step reduces cost function."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 10
        n_components = 5
        n_samples = 400

        X = random_state.uniform(size=(n_samples, n_features))
        K = X.dot(X.T)

        C = right_stochastic_matrix((n_components, n_samples),
                                    random_state=random_state)
        S = right_stochastic_matrix((n_samples, n_components),
                                    random_state=random_state)

        self.assertTrue(np.allclose(C.sum(axis=1), 1, 1e-12))
        self.assertTrue(np.allclose(S.sum(axis=1), 1, 1e-12))

        delta = 1.2
        aa = KernelAA(n_components=n_components, delta=delta)

        aa.K = K
        aa.C = C
        aa.S = S

        aa._initialize_workspace()

        initial_cost = aa._evaluate_cost()

        error = aa._update_dictionary()

        self.assertEqual(error, 0)

        final_cost = aa._evaluate_cost()

        self.assertTrue(final_cost <= initial_cost)

        updated_C = aa.C

        self.assertTrue(np.allclose(updated_C.sum(axis=1), 1, 1e-12))
Exemplo n.º 11
0
    def test_single_update_reduces_cost_function_with_zero_epsilon(self):
        """Test single weights update reduces cost function."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 25
        n_components = 6
        n_samples = 300

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-14))

        lags = [1, 12]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_components, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=2.3,
                                epsilon_weights=0,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_weights()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        self.assertTrue(final_cost <= initial_cost)
Exemplo n.º 12
0
    def test_repeated_updates_converge_with_nonzero_epsilon(self):
        """Test repeated updates converge to fixed point."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 35
        n_components = 4
        n_samples = 500
        max_iter = 100
        tolerance = 1e-6

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-12))

        model = GPNHConvexCoding(n_components=n_components,
                                 epsilon_states=1.3,
                                 tolerance=tolerance,
                                 max_iterations=max_iter)

        model.X = X
        model.Gamma = Gamma
        model.S = S

        model._initialize_workspace()

        cost_delta = 1 + tolerance
        old_cost = model._evaluate_cost()
        new_cost = old_cost
        n_iter = 0

        while abs(cost_delta) > tolerance and n_iter < max_iter:
            old_cost = new_cost
            error = model._update_weights()
            self.assertEqual(error, 0)
            new_cost = model._evaluate_cost()

            cost_delta = new_cost - old_cost

            self.assertTrue(cost_delta <= 0)

            n_iter += 1

        self.assertTrue(n_iter < max_iter)
Exemplo n.º 13
0
    def test_returns_zero_for_perfect_reconstruction(self):
        """Test cost is zero for perfect factorization."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 19
        n_components = 5
        n_samples = 30
        tolerance = 1e-14

        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        lags = np.arange(1, 3)
        n_lags = lags.size
        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=0,
                                epsilon_weights=0,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = random_state.uniform(size=(n_components, ))
        model.order_weights /= model.order_weights.sum()

        model.transition_matrices = np.empty(
            (n_lags, n_components, n_components))
        for i in range(n_lags):
            model.transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model._initialize_workspace()

        cost = model._evaluate_cost()
        expected_cost = 0

        self.assertEqual(cost, expected_cost)
Exemplo n.º 14
0
    def test_exact_solution_is_fixed_point_with_nonzero_epsilon(self):
        """Test exact solution is a fixed point of update step."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 10
        n_components = 7
        n_samples = 40
        tolerance = 1e-12

        S = random_state.uniform(size=(n_components, n_features))

        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        model = GPNHConvexCoding(n_components=n_components, epsilon_states=4.0)

        model.X = X
        model.Gamma = Gamma
        model.S = S

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_weights()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        updated_Gamma = model.Gamma

        self.assertTrue(final_cost <= initial_cost)
        self.assertTrue(np.allclose(Gamma, updated_Gamma, tolerance))
Exemplo n.º 15
0
def _initialize_fembv_weights_random(n_samples,
                                     n_components,
                                     random_state=None):
    rng = check_random_state(random_state)

    return right_stochastic_matrix((n_samples, n_components), random_state=rng)
Exemplo n.º 16
0
    def test_repeated_updates_converge_with_zero_epsilon(self):
        """Test repeated updates converge to fixed point."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 10
        n_components = 9
        n_samples = 500
        max_iter = 100
        tolerance = 1e-6

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-12))

        lags = [1, 12]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_components, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=5.3,
                                epsilon_weights=0,
                                lags=lags,
                                tolerance=tolerance,
                                max_iterations=max_iter)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        cost_delta = 1 + tolerance
        old_cost = model._evaluate_cost()
        new_cost = old_cost
        n_iter = 0

        while abs(cost_delta) > tolerance and n_iter < max_iter:
            old_cost = new_cost
            error = model._update_weights()
            self.assertEqual(error, 0)
            new_cost = model._evaluate_cost()

            cost_delta = new_cost - old_cost

            self.assertTrue(cost_delta <= 0)

            n_iter += 1

        self.assertTrue(n_iter < max_iter)
Exemplo n.º 17
0
    def test_analytic_gradient_matches_numerical_gradient(self):
        """Test analytical gradient matches finite difference approximation."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 3
        n_components = 2
        n_samples = 10

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-12))

        lags = [1, 3]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_lags, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=1.0,
                                epsilon_weights=1.2,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        model._update_parameters_gradient()

        analytic_grad_order_weights = model.grad_order_weights
        analytic_grad_transition_matrices = model.grad_transition_matrices

        def central_difference_order_weights_deriv(i, h=1e-4):
            old_x = model.order_weights[i]

            xmh = old_x - h
            model.order_weights[i] = xmh
            model._initialize_workspace()
            fmh = model._evaluate_cost()

            xph = old_x + h
            model.order_weights[i] = xph
            model._initialize_workspace()
            fph = model._evaluate_cost()

            model.order_weights[i] = old_x

            return (fph - fmh) / (2 * h)

        def central_difference_transition_matrices_deriv(i, j, k, h=1e-4):
            old_x = model.transition_matrices[i, j, k]

            xmh = old_x - h
            model.transition_matrices[i, j, k] = xmh
            model._initialize_workspace()
            fmh = model._evaluate_cost()

            xph = old_x + h
            model.transition_matrices[i, j, k] = xph
            model._initialize_workspace()
            fph = model._evaluate_cost()

            model.transition_matrices[i, j, k] = old_x

            return (fph - fmh) / (2 * h)

        numeric_grad_order_weights = np.zeros((n_lags, ))
        for i in range(n_lags):
            numeric_grad_order_weights[
                i] = central_difference_order_weights_deriv(i)

        self.assertTrue(
            np.allclose(analytic_grad_order_weights,
                        numeric_grad_order_weights, 1e-4))

        numeric_grad_transition_matrices = np.zeros(
            (n_lags, n_components, n_components))
        for i in range(n_lags):
            for j in range(n_components):
                for k in range(n_components):
                    numeric_grad_transition_matrices[i, j, k] = \
                        central_difference_transition_matrices_deriv(i, j, k)

        self.assertTrue(
            np.allclose(analytic_grad_transition_matrices,
                        numeric_grad_transition_matrices, 1e-4))
Exemplo n.º 18
0
    def test_exact_solution_with_zero_delta_is_fixed_point(self):
        """Test exact solution for weights is fixed point of update step."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 30
        n_components = 10
        n_samples = 130
        tolerance = 1e-12

        basis = random_state.uniform(size=(n_components, n_features))

        S = right_stochastic_matrix((n_samples, n_components),
                                    random_state=random_state)

        archetype_indices = np.zeros(n_components, dtype='i8')
        for i in range(n_components):
            new_index = False
            current_index = 0

            while not new_index:
                new_index = True

                current_index = random_state.randint(low=0, high=n_samples)

                for index in archetype_indices:
                    if current_index == index:
                        new_index = False

            archetype_indices[i] = current_index

        C = np.zeros((n_components, n_samples))
        component = 0
        for index in archetype_indices:
            C[component, index] = 1.0
            for i in range(n_components):
                if i == component:
                    S[index, i] = 1.0
                else:
                    S[index, i] = 0.0
            component += 1

        X = S.dot(basis)
        basis_projection = C.dot(X)

        self.assertTrue(np.allclose(basis_projection, basis, tolerance))
        self.assertTrue(np.linalg.norm(X - S.dot(C.dot(X))) < tolerance)

        K = X.dot(X.T)

        delta = 0
        aa = KernelAA(n_components=n_components, delta=delta)

        aa.K = K
        aa.C = C
        aa.S = S

        aa._initialize_workspace()

        initial_cost = aa._evaluate_cost()

        error = aa._update_weights()

        self.assertEqual(error, 0)

        final_cost = aa._evaluate_cost()

        self.assertTrue(abs(final_cost - initial_cost) < tolerance)

        updated_S = aa.S

        self.assertTrue(np.allclose(updated_S.sum(axis=1), 1, 1e-12))
        self.assertTrue(np.allclose(updated_S, S, tolerance))