def test_matrix_output(self, y_values, epsilon, metric, metric_params): """ Test that we are returning the correct kernel values. """ # Setup true values to test again. if y_values is None: y_values_ref = x_values else: y_values_ref = y_values if metric == 'minkowski': pw_distance = cdist(y_values_ref, x_values, metric='minkowski', p=metric_params['p']) else: pw_distance = cdist(y_values_ref, x_values, metric=metric) true_values = np.exp(-1. * pw_distance**2 / epsilon) # Construct the kernel and fit to data. mykernel = kernel.Kernel(kernel_type='gaussian', metric=metric, metric_params=metric_params, epsilon=epsilon, k=len(x_values)) mykernel.fit(x_values) K_matrix = mykernel.compute(y_values).toarray() # Check that error values are beneath tolerance. error_values = (K_matrix - true_values).ravel() total_error = np.linalg.norm(error_values) assert (total_error < 1E-8)
def test_auto_epsilon_selection(self): X = np.arange(100).reshape(-1, 1) mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean', epsilon='bgh', k=10) mykernel.fit(X) assert (mykernel.epsilon_fitted == 1.0) assert (mykernel.d == 1.0)
def test_auto_epsilon_selection(self, eps_method): X = np.arange(100).reshape(-1, 1) mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean', epsilon=eps_method, k=10) mykernel.fit(X) if eps_method == 'bgh': assert (mykernel.epsilon_fitted == 0.25) else: assert (mykernel.epsilon_fitted == 0.50) assert (mykernel.d == 1.0)
def test_sparse_input(self, x_values, y_values, metric, metric_params, use_sparse): """ Test that we are returning the correct kernel values. """ # Setup true values to test again. epsilon = 10. bandwidth_fxn = None if y_values is None: y_values_ref = x_values else: y_values_ref = y_values if metric == 'minkowski': pw_distance = cdist(y_values_ref, x_values, metric='minkowski', p=metric_params['p']) else: pw_distance = cdist(y_values_ref, x_values, metric=metric) if bandwidth_fxn is None: ref_bandwidth_fxn = lambda x: np.ones(x.shape[0]) else: ref_bandwidth_fxn = bandwidth_fxn if use_sparse: x_values = sps.csr_matrix(x_values) y_values_ref = sps.csr_matrix(y_values_ref) x_bandwidth = ref_bandwidth_fxn(x_values) y_bandwidth = ref_bandwidth_fxn(y_values_ref).reshape(-1, 1) scaled_sq_dists = pw_distance**2 / (x_bandwidth * y_bandwidth) true_values = np.exp(-1. * scaled_sq_dists / (4. * epsilon)) # Construct the kernel and fit to data. mykernel = kernel.Kernel(kernel_type='gaussian', metric=metric, metric_params=metric_params, epsilon=epsilon, k=x_values.shape[0], bandwidth_type=bandwidth_fxn) mykernel.fit(x_values) K_matrix = mykernel.compute(y_values).toarray() # Check that error values are beneath tolerance. error_values = (K_matrix - true_values).ravel() total_error = np.linalg.norm(error_values) assert (total_error < 1E-8)
def test_neighborlists(self, x_values, k, neighbor_params): """ Test that neighborlisting gives the right number of elements. """ # Correct number of nearest neighbors. k0 = min(k, x_values.shape[0]) # Construct kernel matrix. mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean', epsilon=1., k=k0, neighbor_params=neighbor_params) mykernel.fit(x_values) K_matrix = mykernel.compute(x_values) # Check if each row has correct number of elements row_has_k_elements = (K_matrix.nnz == k0 * x_values.shape[0]) assert (row_has_k_elements)