def KNN_smote_PCA(self): train, test = self.process_and_split_data() x_train = np.delete(train, obj=8, axis=1) y_train = train[:, 8] x_test = np.delete(test, obj=8, axis=1) y_test = test[:, 8] new_col = pd.get_dummies(x_train[:, 0]) new_col2 = pd.get_dummies(x_test[:, 0]) #create new columns for sex class new_col = np.array(new_col) new_col2 = np.array(new_col2) #add the new columns to features features_train = np.column_stack([x_train, new_col]) features_test = np.column_stack([x_test, new_col2]) #delete sex column features_train = np.delete(features_train, obj=0, axis=1) features_test = np.delete(features_test, obj=0, axis=1) #Handle imbalance features_train, y_train = self.smote(features_train, y_train) #standardize data preprocess = Preprocessing() features_train = preprocess.standardize_data(features_train) features_test = preprocess.standardize_data(features_test) knn = KNeighborsClassifier(n_neighbors=7) knn.fit(features_train, y_train) pred = knn.predict(features_test) #PCA features_train = self.PCA(features_train, 5) features_test = self.PCA(features_test, 5) print() print("KNN - Accuracy smote with PCA") metrics = self.metrics(pred, y_test) print() features = np.vstack((features_train, features_test)) labels = np.vstack((y_train[:, None], y_test[:, None])) cross_val_acc = self.cross_validation(knn, features, labels) return cross_val_acc, y_test, pred, metrics
def logistic_regression_oversampled_PCA(self): train, test = self.process_and_split_data() train_oversampled = self.pre_process_oversample( 1219, "positive", train) x_train = np.delete(train_oversampled, obj=8, axis=1) y_train = train_oversampled[:, 8] x_test = np.delete(test, obj=8, axis=1) y_test = test[:, 8] new_col = pd.get_dummies(x_train[:, 0]) new_col2 = pd.get_dummies(x_test[:, 0]) #create new columns for sex class new_col = np.array(new_col) new_col2 = np.array(new_col2) #add the new columns to features features_train = np.column_stack([x_train, new_col]) features_test = np.column_stack([x_test, new_col2]) #delete sex column features_train = np.delete(features_train, obj=0, axis=1) features_test = np.delete(features_test, obj=0, axis=1) #standardize data preprocess = Preprocessing() features_train = preprocess.standardize_data(features_train) features_test = preprocess.standardize_data(features_test) #PCA features_train = self.PCA(features_train, 5) features_test = self.PCA(features_test, 5) reg = LogisticRegression() reg.fit(features_train, y_train) pred = reg.predict(features_test) print() print("Logisitic Regression - Accuracy over sampled data after PCA") metrics = self.metrics(pred, y_test) features = np.vstack((features_train, features_test)) labels = np.vstack((y_train[:, None], y_test[:, None])) cross_val_acc = self.cross_validation(reg, features, labels) return cross_val_acc, y_test, pred, metrics
def decision_tree_undersampled_PCA(self): train, test = self.process_and_split_data() train_undersampled = self.pre_process_undersample( 1219, "negative", train) x_train = np.delete(train_undersampled, obj=8, axis=1) y_train = train_undersampled[:, 8] x_test = np.delete(test, obj=8, axis=1) y_test = test[:, 8] new_col = pd.get_dummies(x_train[:, 0]) new_col2 = pd.get_dummies(x_test[:, 0]) #create new encoded columns for sex class new_col = np.array(new_col) new_col2 = np.array(new_col2) #add the new columns to features features_train = np.column_stack([x_train, new_col]) features_test = np.column_stack([x_test, new_col2]) #delete sex column features_train = np.delete(features_train, obj=0, axis=1) features_test = np.delete(features_test, obj=0, axis=1) #standardize data preprocess = Preprocessing() features_train = preprocess.standardize_data(features_train) features_test = preprocess.standardize_data(features_test) #PCA features_train = self.PCA(features_train, 5) features_test = self.PCA(features_test, 5) tree = DecisionTreeClassifier() tree.fit(features_train, y_train) pred = tree.predict(features_test) print() print("Decision tree - Accuracy under sampled data with PCA") metrics = self.metrics(pred, y_test) features = np.vstack((features_train, features_test)) labels = np.vstack((y_train[:, None], y_test[:, None])) cross_val_acc = self.cross_validation(tree, features, labels) return cross_val_acc, y_test, pred, metrics
def decision_tree_smote(self): train, test = self.process_and_split_data() x_train = np.delete(train, obj=8, axis=1) y_train = train[:, 8] x_test = np.delete(test, obj=8, axis=1) y_test = test[:, 8] new_col = pd.get_dummies(x_train[:, 0]) new_col2 = pd.get_dummies(x_test[:, 0]) #create new columns for sex class new_col = np.array(new_col) new_col2 = np.array(new_col2) #add the new columns to features features_train = np.column_stack([x_train, new_col]) features_test = np.column_stack([x_test, new_col2]) #delete sex column features_train = np.delete(features_train, obj=0, axis=1) features_test = np.delete(features_test, obj=0, axis=1) #Handle imbalance features_train, y_train = self.smote(features_train, y_train) #standardize data preprocess = Preprocessing() features_train = preprocess.standardize_data(features_train) features_test tree = DecisionTreeClassifier() tree.fit(features_train, y_train) pred = tree.predict(features_test) knn = KNeighborsClassifier(n_neighbors=7) knn.fit(features_train, y_train) pred = knn.predict(features_test) accuracy = metrics.accuracy_score(y_test, pred) print("Decision tree - Accuracy smote data without PCA: ", accuracy) print() features = np.vstack((features_train, features_test)) labels = np.vstack((y_train[:, None], y_test[:, None])) cross_val_acc = self.cross_validation(tree, features, labels) return cross_val_acc, y_test, pred, metrics
def KNN_oversampled(self): train, test = self.process_and_split_data() train_oversampled = self.pre_process_oversample( 1219, "positive", train) x_train = np.delete(train_oversampled, obj=8, axis=1) y_train = train_oversampled[:, 8] x_test = np.delete(test, obj=8, axis=1) y_test = test[:, 8] new_col = pd.get_dummies(x_train[:, 0]) new_col2 = pd.get_dummies(x_test[:, 0]) #create new columns for sex class new_col = np.array(new_col) new_col2 = np.array(new_col2) #add the new columns to features features_train = np.column_stack([x_train, new_col]) features_test = np.column_stack([x_test, new_col2]) #delete sex column features_train = np.delete(features_train, obj=0, axis=1) features_test = np.delete(features_test, obj=0, axis=1) #standardize data preprocess = Preprocessing() features_train = preprocess.standardize_data(features_train) features_test knn = KNeighborsClassifier(n_neighbors=7) knn.fit(features_train, y_train) pred = knn.predict(features_test) print() print("KNN - Accuracy over sampled data without PCA") metrics = self.metrics(pred, y_test) features = np.vstack((features_train, features_test)) labels = np.vstack((y_train[:, None], y_test[:, None])) cross_val_acc = self.cross_validation(knn, features, labels) return cross_val_acc, y_test, pred, metrics
# setting device and default data type device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.set_default_tensor_type('torch.cuda.FloatTensor') # data_loader = fast_loader('yahoo','A3Benchmark') data_loader = fast_loader('nab', 'realKnownCause') # data_loader = fast_loader('kpi') # data-preprocessing for xs, ys, title in data_loader: preprocessor = Preprocessing(xs, ys, q_size, batch_size, device, standardization=standardized, remove_low_freq=low_frq_remove, window_standardization=window_stand, scaling=normalized) train_x, train_y, test_x, test_y = preprocessor.get_data() print('Data are ready') train_idx_anomaly, train_idx_normal, test_idx_anomaly, test_idx_normal = preprocessor.get_index( ) anomaly_is_there(test_idx_anomaly) # plotting window = plt.figure() te_l = window.add_subplot(311)