def KNN_smote_PCA(self):

        train, test = self.process_and_split_data()

        x_train = np.delete(train, obj=8, axis=1)
        y_train = train[:, 8]
        x_test = np.delete(test, obj=8, axis=1)
        y_test = test[:, 8]

        new_col = pd.get_dummies(x_train[:, 0])
        new_col2 = pd.get_dummies(x_test[:, 0])

        #create new columns for sex class
        new_col = np.array(new_col)
        new_col2 = np.array(new_col2)
        #add the new columns to features
        features_train = np.column_stack([x_train, new_col])
        features_test = np.column_stack([x_test, new_col2])

        #delete sex column
        features_train = np.delete(features_train, obj=0, axis=1)
        features_test = np.delete(features_test, obj=0, axis=1)

        #Handle imbalance
        features_train, y_train = self.smote(features_train, y_train)

        #standardize data
        preprocess = Preprocessing()
        features_train = preprocess.standardize_data(features_train)
        features_test = preprocess.standardize_data(features_test)

        knn = KNeighborsClassifier(n_neighbors=7)
        knn.fit(features_train, y_train)
        pred = knn.predict(features_test)

        #PCA
        features_train = self.PCA(features_train, 5)
        features_test = self.PCA(features_test, 5)

        print()
        print("KNN - Accuracy smote with PCA")
        metrics = self.metrics(pred, y_test)
        print()

        features = np.vstack((features_train, features_test))
        labels = np.vstack((y_train[:, None], y_test[:, None]))

        cross_val_acc = self.cross_validation(knn, features, labels)

        return cross_val_acc, y_test, pred, metrics
    def logistic_regression_oversampled_PCA(self):
        train, test = self.process_and_split_data()

        train_oversampled = self.pre_process_oversample(
            1219, "positive", train)

        x_train = np.delete(train_oversampled, obj=8, axis=1)
        y_train = train_oversampled[:, 8]
        x_test = np.delete(test, obj=8, axis=1)
        y_test = test[:, 8]

        new_col = pd.get_dummies(x_train[:, 0])
        new_col2 = pd.get_dummies(x_test[:, 0])

        #create new columns for sex class
        new_col = np.array(new_col)
        new_col2 = np.array(new_col2)
        #add the new columns to features
        features_train = np.column_stack([x_train, new_col])
        features_test = np.column_stack([x_test, new_col2])

        #delete sex column
        features_train = np.delete(features_train, obj=0, axis=1)
        features_test = np.delete(features_test, obj=0, axis=1)

        #standardize data
        preprocess = Preprocessing()
        features_train = preprocess.standardize_data(features_train)
        features_test = preprocess.standardize_data(features_test)

        #PCA
        features_train = self.PCA(features_train, 5)
        features_test = self.PCA(features_test, 5)

        reg = LogisticRegression()
        reg.fit(features_train, y_train)
        pred = reg.predict(features_test)

        print()
        print("Logisitic Regression - Accuracy over sampled data after PCA")
        metrics = self.metrics(pred, y_test)

        features = np.vstack((features_train, features_test))
        labels = np.vstack((y_train[:, None], y_test[:, None]))

        cross_val_acc = self.cross_validation(reg, features, labels)

        return cross_val_acc, y_test, pred, metrics
    def decision_tree_undersampled_PCA(self):
        train, test = self.process_and_split_data()

        train_undersampled = self.pre_process_undersample(
            1219, "negative", train)

        x_train = np.delete(train_undersampled, obj=8, axis=1)
        y_train = train_undersampled[:, 8]
        x_test = np.delete(test, obj=8, axis=1)
        y_test = test[:, 8]

        new_col = pd.get_dummies(x_train[:, 0])
        new_col2 = pd.get_dummies(x_test[:, 0])

        #create new encoded columns for sex class
        new_col = np.array(new_col)
        new_col2 = np.array(new_col2)
        #add the new columns to features
        features_train = np.column_stack([x_train, new_col])
        features_test = np.column_stack([x_test, new_col2])

        #delete sex column
        features_train = np.delete(features_train, obj=0, axis=1)
        features_test = np.delete(features_test, obj=0, axis=1)

        #standardize data
        preprocess = Preprocessing()
        features_train = preprocess.standardize_data(features_train)
        features_test = preprocess.standardize_data(features_test)

        #PCA
        features_train = self.PCA(features_train, 5)
        features_test = self.PCA(features_test, 5)

        tree = DecisionTreeClassifier()
        tree.fit(features_train, y_train)
        pred = tree.predict(features_test)

        print()
        print("Decision tree - Accuracy under sampled data with PCA")
        metrics = self.metrics(pred, y_test)
        features = np.vstack((features_train, features_test))
        labels = np.vstack((y_train[:, None], y_test[:, None]))

        cross_val_acc = self.cross_validation(tree, features, labels)

        return cross_val_acc, y_test, pred, metrics
    def decision_tree_smote(self):
        train, test = self.process_and_split_data()

        x_train = np.delete(train, obj=8, axis=1)
        y_train = train[:, 8]
        x_test = np.delete(test, obj=8, axis=1)
        y_test = test[:, 8]

        new_col = pd.get_dummies(x_train[:, 0])
        new_col2 = pd.get_dummies(x_test[:, 0])

        #create new columns for sex class
        new_col = np.array(new_col)
        new_col2 = np.array(new_col2)
        #add the new columns to features
        features_train = np.column_stack([x_train, new_col])
        features_test = np.column_stack([x_test, new_col2])

        #delete sex column
        features_train = np.delete(features_train, obj=0, axis=1)
        features_test = np.delete(features_test, obj=0, axis=1)

        #Handle imbalance
        features_train, y_train = self.smote(features_train, y_train)

        #standardize data
        preprocess = Preprocessing()
        features_train = preprocess.standardize_data(features_train)
        features_test

        tree = DecisionTreeClassifier()
        tree.fit(features_train, y_train)
        pred = tree.predict(features_test)

        knn = KNeighborsClassifier(n_neighbors=7)
        knn.fit(features_train, y_train)
        pred = knn.predict(features_test)

        accuracy = metrics.accuracy_score(y_test, pred)
        print("Decision tree - Accuracy smote data without PCA: ", accuracy)
        print()
        features = np.vstack((features_train, features_test))
        labels = np.vstack((y_train[:, None], y_test[:, None]))

        cross_val_acc = self.cross_validation(tree, features, labels)

        return cross_val_acc, y_test, pred, metrics
    def KNN_oversampled(self):
        train, test = self.process_and_split_data()

        train_oversampled = self.pre_process_oversample(
            1219, "positive", train)

        x_train = np.delete(train_oversampled, obj=8, axis=1)
        y_train = train_oversampled[:, 8]
        x_test = np.delete(test, obj=8, axis=1)
        y_test = test[:, 8]

        new_col = pd.get_dummies(x_train[:, 0])
        new_col2 = pd.get_dummies(x_test[:, 0])

        #create new columns for sex class
        new_col = np.array(new_col)
        new_col2 = np.array(new_col2)
        #add the new columns to features
        features_train = np.column_stack([x_train, new_col])
        features_test = np.column_stack([x_test, new_col2])

        #delete sex column
        features_train = np.delete(features_train, obj=0, axis=1)
        features_test = np.delete(features_test, obj=0, axis=1)

        #standardize data
        preprocess = Preprocessing()
        features_train = preprocess.standardize_data(features_train)
        features_test

        knn = KNeighborsClassifier(n_neighbors=7)
        knn.fit(features_train, y_train)
        pred = knn.predict(features_test)

        print()
        print("KNN - Accuracy over sampled data without PCA")
        metrics = self.metrics(pred, y_test)

        features = np.vstack((features_train, features_test))
        labels = np.vstack((y_train[:, None], y_test[:, None]))

        cross_val_acc = self.cross_validation(knn, features, labels)

        return cross_val_acc, y_test, pred, metrics