def test_classifer(dataset, classifier_cls, dist_func=distance.euclidean, dist_func_params=None): print("=" * 80) print("\n") print("testing ", classifier_cls.__name__) print("=" * 80) # load data file_train = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) # z-normalization X_train = data_parser.z_normalize(X_train) X_test = data_parser.z_normalize(X_test) print("=" * 60) print("basic information:") print("file_train", file_train) print("file_test", file_test) print("X_train", X_train.shape) print("y_train", y_train.shape) print("X_test", X_test.shape) print("y_test", y_test.shape) print("=" * 60) print("=" * 60) print("knn classification ") # set 1-nn classifier n_jobs = 10 n_neighbors = 1 classifier = classifier_cls(n_neighbors=n_neighbors, distfunc=dist_func, distfunc_params=dist_func_params, n_jobs=n_jobs) print("=" * 60) print("classifier parameters") print("distance function: ", classifier.distfunc.__name__) print("distance parameters: ", classifier.distfunc_params) print("n_jobs: ", classifier.n_jobs) print("n_neighbors: ", classifier.n_neighbors) print("=" * 60) time_start = time() classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) report = classification_report(y_test, y_pred) print(report) time_end = time() acc = validation.cal_accuracy(y_test, y_pred) print("time: ", time_end - time_start) print("accuracy: ", acc)
def experiment_cnn_shapelet(dataset, k=5, k_val=1): file_train = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) # shapelet transformation min_shapelet_length = 8 max_shapelet_length = 30 num_shapelet = int(X_train.shape[1] * 0.5) length_increment = 10 position_increment = 20 callSTS = ShapeletTransformBasic(n_shapelets=num_shapelet, min_shapelet_length=min_shapelet_length, max_shapelet_length=max_shapelet_length, length_increment=length_increment, position_increment=position_increment) print("shapelet learning.....") callSTS.fit(X_train, y_train) bestk_shapelets = callSTS.train() print("bestk_shapelets: ", np.shape(bestk_shapelets)) print("shapelet transformation.....") X_train_ = callSTS.transform(X_train) X_test_ = callSTS.transform(X_test) X_train_ = np.array(X_train_) X_test_ = np.array(X_test_) # cnn classifier X_train_, y_train, X_val_, y_val = data_parser.k_fold_validation_balance(X_train_, y_train, k, k_val) acc = cnn_classifier(X_train_, y_train, X_test_, y_test, X_val_, y_val) # print result print("=" * 80) print("\n") print("original data set: ") print("train set", np.shape(X_train), np.shape(y_train)) print("test set", np.shape(X_test), np.shape(y_test)) print("shapelet transformed set: ") print("learned shapelets: ", np.shape(bestk_shapelets)) print("train set: ", np.shape(X_train_), np.shape(y_train)) print("validation set: ", np.shape(X_val_), np.shape(y_val)) print("test set: ", np.shape(X_test_), np.shape(y_test)) print("\n") print("shapelets learning parameter: ") print("minShapeletLength", min_shapelet_length) print("max_shapelet_length", max_shapelet_length) print("length_increment", length_increment) print("position_increment", position_increment) print("\n") print(acc) print("=" * 80) return acc
def test_find_length(): dataset = 'Beef' file_train = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) print("estimating min length and max length of shapelet learning...") min_length, max_length = estimate_min_max_length(X_train, y_train) print(min_length) print(max_length)
def test_k_fold_split(): print("=" * 80) print("\n") file_train = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) k = 5 k_val = 1 X_train, y_train, X_val, y_val = data_parser.k_fold_validation_balance( X_train, y_train, k, k_val) print(X_train.shape, y_train.shape) print(X_val.shape, y_val.shape) print("=" * 80) print("\n")
def experiment_cnn(dataset, k=5, k_val=1): file_train = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, dataset, dataset + '_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) X_train, y_train, X_val, y_val = data_parser.k_fold_validation_balance(X_train, y_train, k, k_val) acc = cnn_classifier(X_train, y_train, X_test, y_test, X_val, y_val) print("="*80) print("\n") print("data set: ") print("train set", np.shape(X_train), np.shape(y_train)) print("validation set", np.shape(X_val), np.shape(y_val)) print("test set", np.shape(X_test), np.shape(y_test)) print(acc) print("=" * 80) return acc
def run(dataset, shapelet_cls, min_shapelet_length=0.1, max_shapelet_length=0.7, num_shapelet=0.5, length_increment=20, position_increment=20, n_neighbors=1, n_jobs=10, is_normalize=False, log_dir=None): print("="*80) print("\n") print("testing ", shapelet_cls.__name__) print("\n") # load data file_train = os.path.join(UCR_DATA_ROOT, dataset, dataset+'_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, dataset, dataset+'_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) # z normalization if is_normalize: X_train = data_parser.z_normalize(X_train) X_test = data_parser.z_normalize(X_test) print("="*80) print("basic information: \n") print("file_train: ", file_train) print("file_test: ", file_test) print("X_train", X_train.shape) print("y_train", y_train.shape) print("X_test", X_test.shape) print("y_test", y_test.shape) print("="*80) # shapelet setup if min_shapelet_length < 1: min_shapelet_length = int(X_train.shape[1] * min_shapelet_length) if max_shapelet_length < 1: max_shapelet_length = int(X_train.shape[1] * max_shapelet_length) if num_shapelet: num_shapelet = int(X_train.shape[1] * num_shapelet) callSTS = shapelet_cls(n_shapelets=num_shapelet, min_shapelet_length=min_shapelet_length, max_shapelet_length=max_shapelet_length, length_increment=length_increment, position_increment=position_increment) callSTS.fit(X_train, y_train) print("="*80) print("set up shapelet learning and transformation: \n") print("series_list", np.shape(callSTS.series_list)) print("class_list", np.shape(callSTS.class_list)) print("num shapelets", callSTS.n_shapelets) print("min_shapelet_length", callSTS.min_shapelet_length) print("max_shapelet_length", callSTS.max_shapelet_length) print("length_increment", callSTS.length_increment) print("position_increment", callSTS.position_increment) print("distance function", str(callSTS.dist_func.__name__)) print("distance parameter", callSTS.dist_func_params) if hasattr(callSTS, 'class_distribution'): print("class distribution", callSTS.class_distribution) print("="*80) print("="*80) print("shapelet learning ......") _ = callSTS.train() print("best k shapelet: ", np.shape(callSTS.best_shapelets_content_)) print("="*80) print("="*80) print("shapelet transformation ......") X_train_transform = callSTS.transform(X_train) X_test_transform = callSTS.transform(X_test) X_train_transform = np.array(X_train_transform) X_test_transform = np.array(X_test_transform) print("X_train_transform: ", np.shape(X_train_transform)) print("X_test_transform: ", np.shape(X_test_transform)) # knn classification print("="*80) print("knn classification ......") KNNC = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=n_jobs) KNNC.fit(X_train_transform, y_train) y_pred = KNNC.predict(X_test_transform) acc = validation.cal_accuracy(y_test, y_pred) print("accuracy: ", acc) # log shapelet if log_dir is not None: file_shapelet = os.path.join(log_dir, "%s_minlen-%s_maxlen-%s" % (dataset, min_shapelet_length, max_shapelet_length)) print("saving shapelet to %s........." % file_shapelet) callSTS.save_shapelet(file_shapelet) return acc, callSTS.best_shapelets_content_
n_neighbors = 1 n_jobs = 10 k_fold = 10 distfunc = distance.euclidean # set up knn classifier knn_clf = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=n_jobs, distfunc=distfunc) result = [['dataset name', 'accuracy']] for name in dataset_: # load data set file_train = os.path.join(UCR_DATA_ROOT, name, name + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, name, name + '_TEST') X_train, y_train = data_parser.load_ucr(file_train) X_test, y_test = data_parser.load_ucr(file_test) # normalize X_train = data_parser.z_normalize(X_train) X_test = data_parser.z_normalize(X_test) # k fold validation acc_sum = 0 for i in range(k_fold): knn_clf.fit(X_train, y_train) y_pred = knn_clf.predict(X_test) acc_sum += validation.cal_accuracy(y_test, y_pred) X_train, y_train, X_test, y_test = data_parser.resample_data(X_train, y_train, X_test, y_test) acc = acc_sum / k_fold result.append((name, acc))
'dist_func': dist_func, 'dist_func_params': dist_func_params } print("=" * 80) print(__doc__) print("\n") result = [] result.append(("data set name", "time", "time of parallel", "size", "length of sub sequence", "length of distance list")) for set_name in dataset: file_train = os.path.join(UCR_DATA_ROOT, set_name, set_name + '_TRAIN') file_test = os.path.join(UCR_DATA_ROOT, set_name, set_name + '_TEST') X_train, _ = data_parser.load_ucr(file_train) X_test, _ = data_parser.load_ucr(file_test) X = np.vstack([X_train, X_test]) m, n = X.shape sublen = int(0.5 * n) subseq = X[0][:sublen] t_start = time() distance_list = [] for i in range(m): dist = distance.dist_subsequence(subsequence=subseq, wholeseries=X[i], **params) distance_list.append(dist) t_end = time() t_non_parallel = t_end - t_start