def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = to_time_series_dataset(X) if fit_time: self._X_fit = X if self.gamma == "auto": self.gamma_ = gamma_soft_dtw(X) else: self.gamma_ = self.gamma self.classes_ = numpy.unique(y) else: check_is_fitted(self, ['svm_estimator_', '_X_fit']) X = check_dims(X, X_fit_dims=self._X_fit.shape, extend=True, check_n_features_only=(self.kernel in VARIABLE_LENGTH_METRICS)) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = to_sklearn_dataset(X) if y is None: return sklearn_X else: return sklearn_X, y
def merge_crossover(ind1, ind2): """Merge shapelets from one set with shapelets from the other""" # Construct a pairwise similarity matrix using GAK _all = list(ind1) + list(ind2) similarity_matrix = cdist_gak(ind1, ind2, sigma=sigma_gak(_all)) # Iterate over shapelets in `ind1` and merge them with shapelets # from `ind2` for row_idx in range(similarity_matrix.shape[0]): # Remove all elements equal to 1.0 mask = similarity_matrix[row_idx, :] != 1.0 non_equals = similarity_matrix[row_idx, :][mask] if len(non_equals): # Get the timeseries most similar to the one at row_idx max_col_idx = np.argmax(non_equals) ts1 = list(ind1[row_idx]).copy() ts2 = list(ind2[max_col_idx]).copy() # Merge them and remove nans ind1[row_idx] = euclidean_barycenter([ts1, ts2]) ind1[row_idx] = ind1[row_idx][~np.isnan(ind1[row_idx])] # Apply the same for the elements in ind2 for col_idx in range(similarity_matrix.shape[1]): mask = similarity_matrix[:, col_idx] != 1.0 non_equals = similarity_matrix[:, col_idx][mask] if len(non_equals): max_row_idx = np.argmax(non_equals) ts1 = list(ind1[max_row_idx]).copy() ts2 = list(ind2[col_idx]).copy() ind2[col_idx] = euclidean_barycenter([ts1, ts2]) ind2[col_idx] = ind2[col_idx][~np.isnan(ind2[col_idx])] return ind1, ind2
def sparse_gak(X, X_fit): if X_fit is X: return cdist_gak(X.reshape((-1, sz, d)), None, sigma=numpy.sqrt(gamma / 2.)) if slice_support_vectors is not None: # slice out support vectors sliced_X_fit = X_fit[slice_support_vectors] gak_sim_dense = cdist_gak(X.reshape((-1, sz, d)), sliced_X_fit.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.)) # act like nothing has happend ... gak_sim = numpy.empty((len(X), len(X_fit))) gak_sim[:, slice_support_vectors] = gak_sim_dense return gak_sim return cdist_gak(X.reshape((-1, sz, d)), X_fit.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.))
def _kernel_func_gak(self, x, y): g = self.estimator.gamma sz = self.estimator.sz d = self.estimator.d if g == "auto": g = 1.0 return cdist_gak(x.reshape((-1, sz, d)), y.reshape((-1, sz, d)), sigma=np.sqrt(g / 2.0))
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = check_dims(X, X_fit=None) X = to_time_series_dataset(X) if fit_time: self._X_fit = X self.gamma_ = gamma_soft_dtw(X) self.classes_ = numpy.unique(y) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = _prepare_ts_datasets_sklearn(X) if y is None: return sklearn_X else: return sklearn_X, y
def _get_kernel(self, X, Y=None): kernel_params = self._get_kernel_params() if self.kernel == "gak": return cdist_gak(X, Y, n_jobs=self.n_jobs, verbose=self.verbose, **kernel_params) else: X_sklearn = to_sklearn_dataset(X) if Y is not None: Y_sklearn = to_sklearn_dataset(Y) else: Y_sklearn = Y return pairwise_kernels(X_sklearn, Y_sklearn, metric=self.kernel, n_jobs=self.n_jobs, **kernel_params)
def test_gamma_value_svm(): n, sz, d = 5, 10, 3 rng = np.random.RandomState(0) time_series = rng.randn(n, sz, d) labels = rng.randint(low=0, high=2, size=n) gamma = 10. for ModelClass in [TimeSeriesSVC, TimeSeriesSVR]: gak_model = ModelClass(kernel="gak", gamma=gamma) sklearn_X, _ = gak_model._preprocess_sklearn(time_series, labels, fit_time=True) cdist_mat = cdist_gak(time_series, sigma=np.sqrt(gamma / 2.)) np.testing.assert_allclose(sklearn_X, cdist_mat)
def _get_kernel(self, X, Y=None): return cdist_gak(X, Y, sigma=self.sigma)
def _get_kernel(self, X, Y=None): return cdist_gak(X, Y, sigma=self.sigma, n_jobs=self.n_jobs, verbose=self.verbose)
def _kernel_func_gak(sz, d, gamma): if gamma == "auto": gamma = 1. return lambda x, y: cdist_gak(x.reshape((-1, sz, d)), y.reshape((-1, sz, d)), sigma=numpy.sqrt(gamma / 2.))