def fit(self, X, y, sample_weight=None): sklearn_X = _prepare_ts_datasets_sklearn(X) if self.kernel == "gak" and self.gamma == "auto": self.gamma = gamma_soft_dtw(to_time_series_dataset(X)) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma) super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight) self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_) return self
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = to_time_series_dataset(X) if fit_time: self._X_fit = X if self.gamma == "auto": self.gamma_ = gamma_soft_dtw(X) else: self.gamma_ = self.gamma self.classes_ = numpy.unique(y) else: check_is_fitted(self, ['svm_estimator_', '_X_fit']) X = check_dims(X, X_fit_dims=self._X_fit.shape, extend=True, check_n_features_only=(self.kernel in VARIABLE_LENGTH_METRICS)) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = to_sklearn_dataset(X) if y is None: return sklearn_X else: return sklearn_X, y
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = check_dims(X, X_fit=None) X = to_time_series_dataset(X) if fit_time: self._X_fit = X self.gamma_ = gamma_soft_dtw(X) self.classes_ = numpy.unique(y) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = _prepare_ts_datasets_sklearn(X) if y is None: return sklearn_X else: return sklearn_X, y
def softdtw_augment_train_set(x_train, y_train, classes, num_synthetic_ts, max_neighbors=5): from tslearn.neighbors import KNeighborsTimeSeries from tslearn.barycenters import softdtw_barycenter from tslearn.metrics import gamma_soft_dtw # synthetic train set and labels synthetic_x_train = [] synthetic_y_train = [] # loop through each class for c in classes: # get the MTS for this class c_x_train = x_train[np.where(y_train == 0)[0]] if len(c_x_train) == 1: # skip if there is only one time series per set continue # compute appropriate gamma for softdtw for the entire class class_gamma = gamma_soft_dtw(c_x_train) # loop through the number of synthtectic examples needed generated_samples = 0 while generated_samples < num_synthetic_ts: # Choose a random representative for the class representative_indices = np.arange(len(c_x_train)) random_representative_index = np.random.choice( representative_indices, size=1, replace=False) random_representative = c_x_train[random_representative_index] # Choose a random number of neighbors (between 1 and one minus the total number of class representatives) random_number_of_neighbors = int( np.random.uniform(1, max_neighbors, size=1)) knn = KNeighborsTimeSeries(n_neighbors=random_number_of_neighbors + 1, metric='softdtw', metric_params={ 'gamma': class_gamma }).fit(c_x_train) random_neighbor_distances, random_neighbor_indices = knn.kneighbors( X=random_representative, return_distance=True) random_neighbor_indices = random_neighbor_indices[0] random_neighbor_distances = random_neighbor_distances[0] nearest_neighbor_distance = np.sort(random_neighbor_distances)[1] # random_neighbors = np.zeros((random_number_of_neighbors+1, c_x_train.shape[1]), dtype=float) random_neighbors = np.zeros( (random_number_of_neighbors + 1, c_x_train.shape[1], c_x_train.shape[2]), dtype=float) for j, neighbor_index in enumerate(random_neighbor_indices): random_neighbors[j, :] = c_x_train[neighbor_index] # Choose a random weight vector (and then normalize it) weights = np.exp( np.log(0.5) * random_neighbor_distances / nearest_neighbor_distance) weights /= np.sum(weights) # Compute tslearn.barycenters.softdtw_barycenter with weights=random weights and gamma value specific to neighbors random_neighbors_gamma = gamma_soft_dtw(random_neighbors) generated_sample = softdtw_barycenter(random_neighbors, weights=weights, gamma=random_neighbors_gamma) synthetic_x_train.append(generated_sample) synthetic_y_train.append(c) # Repeat until you have the desired number of synthetic samples for each class generated_samples += 1 # return the synthetic set return np.array(synthetic_x_train), np.array(synthetic_y_train)