Exemplo n.º 1
0
    def test_returns_zero_for_perfect_reconstruction_and_exact_weights(self):
        """Test cost is zero for perfect factorization and Markov weights."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 19
        n_components = 5
        n_samples = 30
        tolerance = 1e-14

        lags = [1, 3]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_lags, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        S = random_state.uniform(size=(n_components, n_features))

        Gamma = np.zeros((n_samples, n_components), dtype='f8')
        Gamma[0] = random_state.uniform(size=(n_components, ))
        Gamma[0] /= Gamma[0].sum()
        Gamma[1] = random_state.uniform(size=(n_components, ))
        Gamma[1] /= Gamma[1].sum()
        Gamma[2] = random_state.uniform(size=(n_components, ))
        Gamma[2] /= Gamma[2].sum()
        for t in range(3, n_samples):
            Gamma[t] = (
                order_weights[0] * transition_matrices[0].dot(Gamma[t - 1]) +
                order_weights[1] * transition_matrices[1].dot(Gamma[t - 3]))

        self.assertTrue(np.allclose(order_weights.sum(), 1, tolerance))
        for i in range(n_lags):
            self.assertTrue(
                np.allclose(transition_matrices[i].sum(axis=0), 1, tolerance))
        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=0,
                                epsilon_weights=2.3,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()
        cost = model._evaluate_cost()
        expected_cost = 0

        self.assertTrue(abs(cost - expected_cost) < tolerance)
Exemplo n.º 2
0
    def test_selects_elements_in_convex_hull(self):
        """Test routine correctly selects points in convex hull."""

        n_features = 2
        n_samples = 10

        basis = np.array([[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]],
                         dtype='f8')
        n_basis = basis.shape[0]

        weights = left_stochastic_matrix((n_samples, n_basis))

        assignments = [0, 4, 6, 9]
        for i in range(n_basis):
            weights[assignments[i]] = np.zeros(n_basis)
            weights[assignments[i], i] = 1

        X = weights.dot(basis)
        K = np.zeros((n_samples, n_samples))

        for i in range(n_samples):
            for j in range(n_samples):
                K[i, j] = np.linalg.norm(X[i] - X[j])

        n_components = basis.shape[0]

        result = furthest_sum(K, n_components, 1)
        result = sorted(result)

        self.assertTrue(len(result) == n_components)
        for i in range(n_components):
            self.assertTrue(result[i] == assignments[i])
Exemplo n.º 3
0
    def test_exact_solution_is_fixed_point_with_zero_epsilon(self):
        """Test exact solution is a fixed point of update step."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 5
        n_components = 6
        n_samples = 40
        tolerance = 1e-12

        S = random_state.uniform(size=(n_components, n_features))

        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        lags = [1, 12]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_components, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=0,
                                epsilon_weights=0,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_weights()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        updated_Gamma = model.Gamma

        self.assertTrue(final_cost <= initial_cost)
        self.assertTrue(np.allclose(Gamma, updated_Gamma, tolerance))
Exemplo n.º 4
0
    def _fit_parameters(self):
        """Fit Markov chain parameters for weights."""

        if self.verbose:
            print(
                "*** GPNH convex coding CROM: n_components = {:d} ***".format(
                    self.Gamma.shape[1]))
            print('{:<12s} | {:<13s} | {:<13s} | {:<12s}'.format(
                'Iteration', 'Cost', 'Cost delta', 'Time'))
            print(60 * '-')

        n_lags = self.lags.size
        self.order_weights = self.random_state.uniform(size=(n_lags, ))
        self.order_weights /= self.order_weights.sum()

        self.transition_matrices = np.empty(
            (n_lags, self.n_components, self.n_components), dtype=self.X.dtype)
        for p in range(n_lags):
            self.transition_matrices[p] = left_stochastic_matrix(
                (self.n_components, self.n_components),
                random_state=self.random_state)

        self._initialize_parameters_workspace()

        old_cost = self._evaluate_fit_residual()
        new_cost = old_cost

        for n_iter in range(self.max_iterations):
            start_time = time.perf_counter()

            old_cost = new_cost

            self._update_parameters()
            new_cost = self._evaluate_fit_residual()
            if (new_cost > old_cost) and self.require_monotonic_cost_decrease:
                raise RuntimeError(
                    'factorization cost increased after parameters update')

            cost_delta = new_cost - old_cost

            end_time = time.perf_counter()

            if self.verbose:
                print('{:12d} | {: 12.6e} | {: 12.6e} | {: 12.6e}'.format(
                    n_iter + 1, new_cost, cost_delta, end_time - start_time))

            if abs(cost_delta) < self.tolerance:
                if self.verbose:
                    print('*** Converged at iteration {:d} ***'.format(n_iter +
                                                                       1))
                break
Exemplo n.º 5
0
    def test_single_update_reduces_cost_function_with_zero_epsilon(self):
        """Test single weights update reduces cost function."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 25
        n_components = 6
        n_samples = 300

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-14))

        lags = [1, 12]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_components, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=2.3,
                                epsilon_weights=0,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_weights()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        self.assertTrue(final_cost <= initial_cost)
Exemplo n.º 6
0
def _initialize_mtd_convex_coding_parameters_random(data,
                                                    n_components,
                                                    order=1,
                                                    random_state=None):
    rng = check_random_state(random_state)

    order_weights = rng.uniform(size=(order, ))
    order_weights /= order_weights.sum()

    transition_matrices = np.empty((order, n_components, n_components),
                                   dtype=data.dtype)
    for p in range(order):
        transition_matrices[p] = left_stochastic_matrix(
            (n_components, n_components), random_state=rng)

    return order_weights, transition_matrices
Exemplo n.º 7
0
    def test_returns_zero_for_perfect_reconstruction(self):
        """Test cost is zero for perfect factorization."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 19
        n_components = 5
        n_samples = 30
        tolerance = 1e-14

        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        lags = np.arange(1, 3)
        n_lags = lags.size
        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=0,
                                epsilon_weights=0,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = random_state.uniform(size=(n_components, ))
        model.order_weights /= model.order_weights.sum()

        model.transition_matrices = np.empty(
            (n_lags, n_components, n_components))
        for i in range(n_lags):
            model.transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model._initialize_workspace()

        cost = model._evaluate_cost()
        expected_cost = 0

        self.assertEqual(cost, expected_cost)
Exemplo n.º 8
0
    def test_analytic_gradient_matches_numerical_gradient(self):
        """Test analytical gradient matches finite difference approximation."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 3
        n_components = 2
        n_samples = 10

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-12))

        lags = [1, 3]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_lags, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=1.0,
                                epsilon_weights=1.2,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        model._update_parameters_gradient()

        analytic_grad_order_weights = model.grad_order_weights
        analytic_grad_transition_matrices = model.grad_transition_matrices

        def central_difference_order_weights_deriv(i, h=1e-4):
            old_x = model.order_weights[i]

            xmh = old_x - h
            model.order_weights[i] = xmh
            model._initialize_workspace()
            fmh = model._evaluate_cost()

            xph = old_x + h
            model.order_weights[i] = xph
            model._initialize_workspace()
            fph = model._evaluate_cost()

            model.order_weights[i] = old_x

            return (fph - fmh) / (2 * h)

        def central_difference_transition_matrices_deriv(i, j, k, h=1e-4):
            old_x = model.transition_matrices[i, j, k]

            xmh = old_x - h
            model.transition_matrices[i, j, k] = xmh
            model._initialize_workspace()
            fmh = model._evaluate_cost()

            xph = old_x + h
            model.transition_matrices[i, j, k] = xph
            model._initialize_workspace()
            fph = model._evaluate_cost()

            model.transition_matrices[i, j, k] = old_x

            return (fph - fmh) / (2 * h)

        numeric_grad_order_weights = np.zeros((n_lags, ))
        for i in range(n_lags):
            numeric_grad_order_weights[
                i] = central_difference_order_weights_deriv(i)

        self.assertTrue(
            np.allclose(analytic_grad_order_weights,
                        numeric_grad_order_weights, 1e-4))

        numeric_grad_transition_matrices = np.zeros(
            (n_lags, n_components, n_components))
        for i in range(n_lags):
            for j in range(n_components):
                for k in range(n_components):
                    numeric_grad_transition_matrices[i, j, k] = \
                        central_difference_transition_matrices_deriv(i, j, k)

        self.assertTrue(
            np.allclose(analytic_grad_transition_matrices,
                        numeric_grad_transition_matrices, 1e-4))
Exemplo n.º 9
0
    def test_repeated_updates_converge_with_nonzero_epsilon(self):
        """Test repeated updates converge to fixed point."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 35
        n_components = 4
        n_samples = 500
        max_iter = 100
        tolerance = 1e-6

        X = random_state.uniform(size=(n_samples, n_features))
        S = random_state.uniform(size=(n_components, n_features))
        Gamma = right_stochastic_matrix((n_samples, n_components),
                                        random_state=random_state)

        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, 1e-12))

        lags = [1, 12]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_components, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=1.3,
                                epsilon_weights=3.4,
                                lags=lags,
                                tolerance=tolerance,
                                max_iterations=max_iter)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        cost_delta = 1 + tolerance
        old_cost = model._evaluate_cost()
        new_cost = old_cost
        n_iter = 0

        while abs(cost_delta) > tolerance and n_iter < max_iter:
            old_cost = new_cost
            error = model._update_weights()
            self.assertEqual(error, 0)
            new_cost = model._evaluate_cost()

            cost_delta = new_cost - old_cost

            self.assertTrue(cost_delta <= 0)

            n_iter += 1

        self.assertTrue(n_iter < max_iter)
Exemplo n.º 10
0
    def test_prediction_matches_example_in_small_example(self):
        """Test result of calling predict matches expected value."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 3
        n_components = 2
        n_samples = 10
        n_extra = 10
        tolerance = 1e-12

        lags = [
            1,
            3,
        ]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_lags, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        S = random_state.uniform(size=(n_components, n_features))

        Gamma = np.zeros((n_samples + n_extra, n_components), dtype='f8')
        Gamma[0] = random_state.uniform(size=(n_components, ))
        Gamma[0] /= Gamma[0].sum()
        Gamma[1] = random_state.uniform(size=(n_components, ))
        Gamma[1] /= Gamma[1].sum()
        Gamma[2] = random_state.uniform(size=(n_components, ))
        Gamma[2] /= Gamma[2].sum()

        for t in range(3, n_samples + n_extra):
            Gamma[t] = (
                order_weights[0] * transition_matrices[0].dot(Gamma[t - 1]) +
                order_weights[1] * transition_matrices[1].dot(Gamma[t - 3]))

        self.assertTrue(np.allclose(order_weights.sum(), 1, tolerance))
        for i in range(n_lags):
            self.assertTrue(
                np.allclose(transition_matrices[i].sum(axis=0), 1, tolerance))
        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma[:n_samples].dot(S)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=4.0,
                                epsilon_weights=2.2,
                                lags=lags,
                                tolerance=1e-12)

        model.X = X
        model.Gamma = Gamma[:n_samples]
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        test_X = Gamma[n_samples:].dot(S)
        transformed_weights = model.transform(test_X)
        predicted_X = model.predict(test_X, horizon=1)

        self.assertTrue(np.all(np.isnan(predicted_X[:np.max(lags) - 1])))
        for i in range(np.max(lags) - 1, n_extra):
            prediction = (
                model.order_weights[0] *
                model.transition_matrices[0].dot(transformed_weights[i]) +
                model.order_weights[1] *
                model.transition_matrices[1].dot(transformed_weights[i - 2]))

            prediction = model.S.T.dot(prediction)

            self.assertTrue(np.allclose(prediction, predicted_X[i], 1e-3))
Exemplo n.º 11
0
    def test_exact_solution_is_fixed_point_with_nonzero_epsilon(self):
        """Test exact solution is a fixed point of update step."""

        random_seed = 0
        random_state = check_random_state(random_seed)

        n_features = 10
        n_components = 7
        n_samples = 40
        tolerance = 1e-12

        lags = [
            1,
            2,
        ]
        n_lags = len(lags)

        order_weights = random_state.uniform(size=(n_lags, ))
        order_weights /= order_weights.sum()

        transition_matrices = np.empty((n_lags, n_components, n_components))
        for i in range(n_lags):
            transition_matrices[i] = left_stochastic_matrix(
                (n_components, n_components), random_state=random_state)

        S = random_state.uniform(size=(n_components, n_features))

        Gamma = np.zeros((n_samples, n_components), dtype='f8')
        Gamma[0] = random_state.uniform(size=(n_components, ))
        Gamma[0] /= Gamma[0].sum()
        Gamma[1] = random_state.uniform(size=(n_components, ))
        Gamma[1] /= Gamma[1].sum()
        for t in range(2, n_samples):
            Gamma[t] = (
                order_weights[0] * transition_matrices[0].dot(Gamma[t - 1]) +
                order_weights[1] * transition_matrices[1].dot(Gamma[t - 2]))

        self.assertTrue(np.allclose(order_weights.sum(), 1, tolerance))
        for i in range(n_lags):
            self.assertTrue(
                np.allclose(transition_matrices[i].sum(axis=0), 1, tolerance))
        self.assertTrue(np.allclose(Gamma.sum(axis=1), 1, tolerance))

        X = Gamma.dot(S)

        model = MTDConvexCoding(n_components=n_components,
                                epsilon_states=4.0,
                                epsilon_weights=2.2,
                                lags=lags)

        model.X = X
        model.Gamma = Gamma
        model.S = S
        model.order_weights = order_weights
        model.transition_matrices = transition_matrices

        model._initialize_workspace()

        initial_cost = model._evaluate_cost()

        error = model._update_parameters()

        self.assertEqual(error, 0)

        final_cost = model._evaluate_cost()

        updated_order_weights = model.order_weights
        updated_transition_matrices = model.transition_matrices

        self.assertTrue(final_cost <= initial_cost)

        self.assertTrue(np.allclose(updated_order_weights.sum(), 1, tolerance))
        for i in range(n_lags):
            self.assertTrue(
                np.allclose(updated_transition_matrices[i].sum(axis=0), 1,
                            tolerance))

        self.assertTrue(
            np.allclose(order_weights, updated_order_weights, tolerance))
        self.assertTrue(
            np.allclose(transition_matrices, updated_transition_matrices,
                        tolerance))