def __runOPF(self, X_train,y_train,index_train,X_test,y_test,index_test, score): # Creates a SupervisedOPF instance opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None) # Fits training data into the classifier opf.fit(X_train, y_train, index_train) # Predicts new data preds, conqs = opf.predict(X_test) self.__computeScore(y_test, preds, conqs, score)
def supervised_opf_feature_selection(opytimizer): # Gathers features features = opytimizer[:, 0].astype(bool) # Remaking training and validation subgraphs with selected features X_train_selected = X_train[:, features] X_val_selected = X_val[:, features] # Creates a SupervisedOPF instance opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None) # Fits training data into the classifier opf.fit(X_train_selected, Y_train) # Predicts new data preds = opf.predict(X_val_selected) # Calculates accuracy acc = g.opf_accuracy(Y_val, preds) return 1 - acc
X, Y = p.parse_loader(txt) # Splitting data into training and validation sets X_train, X_val, Y_train, Y_val = s.split(X, Y, percentage=0.5, random_state=1) # Creates a always true loop while True: # Creates a SupervisedOPF instance opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None) # Fits training data into the classifier opf.fit(X_train, Y_train) # Predicts new data preds = opf.predict(X_val) # Calculating accuracy acc = g.opf_accuracy(Y_val, preds) print(f'Accuracy: {acc}') # Gathers which samples were missclassified errors = np.argwhere(Y_val != preds) # If there are no missclassified samples if len(errors) == 0: # Breaks the process break # For every wrong classified sample
import opfython.stream.parser as p import opfython.stream.splitter as s from opfython.models.supervised import SupervisedOPF # Loading a .txt file to a numpy array txt = l.load_txt('data/boat.txt') # Parsing a pre-loaded numpy array X, Y = p.parse_loader(txt) # Splitting data into training and testing sets X_train, X_test, Y_train, Y_test = s.split(X, Y, percentage=0.5, random_state=1) # Creates a SupervisedOPF instance opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None) # Fits training data into the classifier opf.fit(X_train, Y_train) # Predicts new data preds = opf.predict(X_test) # Calculating accuracy acc = g.opf_accuracy(Y_test, preds) print(f'Accuracy: {acc}')
class US(object): def __init__(self, path_output): self.opfSup = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None) self.path_output=path_output def __classify(self, x_train,y_train, x_valid, y_valid, minority_class): # Training the OPF indexes = np.arange(len(x_train)) self.opfSup.fit(x_train, y_train,indexes) # Prediction of the validation samples y_pred,_ = self.opfSup.predict(x_valid) y_pred = np.array(y_pred) # Validation measures for this k nearest neighbors accuracy = accuracy_score(y_valid, y_pred) recall = recall_score(y_valid, y_pred, pos_label=minority_class) # assuming that 2 is the minority class f1 = f1_score(y_valid, y_pred, pos_label=minority_class) return accuracy, recall, f1, y_pred def __saveResults(self, X_train,Y_train, X_test, Y_test, ds,f, approach, minority_class): path = '{}/down_{}/{}/{}'.format(self.path_output,approach,ds,f) if not os.path.exists(path): os.makedirs(path) results_print=[] accuracy, recall, f1, pred = self.__classify(X_train,Y_train, X_test, Y_test, minority_class) results_print.append([0,accuracy, recall, f1]) np.savetxt('{}/pred.txt'.format(path), pred, fmt='%d') np.savetxt('{}/results.txt'.format(path), results_print, fmt='%d,%.5f,%.5f,%.5f') def __saveDataset(self, X_train,Y_train, pathDataset,ds_name): DS = np.insert(X_train,len(X_train[0]),Y_train , axis=1) np.savetxt('{}/train_{}.txt'.format(pathDataset, ds_name),DS, fmt='%.5f,'*(len(X_train[0]))+'%d') def __computeScore(self, labels, preds, conqs, score): for i in range(len(labels)): if labels[i]==preds[i]: score[conqs[i]]+=1 else: score[conqs[i]]-=1 def major_negative(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class): #1st case: remove samples from majoritary class with negative scores output_majority = output[output[:,1]==majority_class] output_majority_negative = output_majority[output_majority[:,2]<0] X_train = np.delete(X, output_majority_negative[:,0],0) Y_train = np.delete(Y, output_majority_negative[:,0]) self.__saveDataset(X_train,Y_train, path,'major_negative') self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'major_negative', minority_class) def major_neutral(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class): #2st case: remove samples from majoritary class with negative or zero scores output_majority = output[output[:,1]==majority_class] output_majority_neutal = output_majority[output_majority[:,2]<=0] X_train = np.delete(X, output_majority_neutal[:,0],0) Y_train = np.delete(Y, output_majority_neutal[:,0]) self.__saveDataset(X_train,Y_train, path,'major_neutral') self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'major_neutral', minority_class) def negative(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class): #3st case: remove all samples with negative output_negatives = output[output[:,2]<0] X_train = np.delete(X, output_negatives[:,0],0) Y_train = np.delete(Y, output_negatives[:,0]) self.__saveDataset(X_train,Y_train, path,'negative') self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'negative', minority_class) def negatives_major_zero(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class): #4st case: remove samples from majoritary class with negative or zero scores # and from minoritary class with negative scores output_negatives = output[output[:,2]<0] output_negatives_major_zero = output_negatives[output_negatives[:,1]==majority_class] output_negatives_major_zero = output_negatives_major_zero[output_negatives_major_zero[:,2]<=0] X_train = np.delete(X, output_negatives_major_zero[:,0],0) Y_train = np.delete(Y, output_negatives_major_zero[:,0]) self.__saveDataset(X_train,Y_train, path,'negatives_major_zero') self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'negatives_major_zero', minority_class) def balance(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class): #5st case: remove samples from majoritary class until balancing the dataset # find the number of samples to remove n_samples = len(output) n_samples_minority = len(output[output[:,1]==2]) n_samples_to_remove = n_samples - (n_samples_minority*2) # sort samples from majority class by score output_majority= output[output[:,1]==majority_class] order = np.argsort(output_majority[:,2]) output_majority_ordered = output_majority[order,:] # remove samples output_to_remove = output_majority_ordered[:n_samples_to_remove,:] X_train = np.delete(X, output_to_remove[:,0],0) Y_train = np.delete(Y, output_to_remove[:,0]) # save new dataset and results self.__saveDataset(X_train,Y_train, path,'balance') self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'balance', minority_class) def __runOPF(self, X_train,y_train,index_train,X_test,y_test,index_test, score): # Creates a SupervisedOPF instance opf = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None) # Fits training data into the classifier opf.fit(X_train, y_train, index_train) # Predicts new data preds, conqs = opf.predict(X_test) self.__computeScore(y_test, preds, conqs, score) def run(self, X, Y, indices): # Create stratified k-fold subsets kfold = 5 # no. of folds skf = StratifiedKFold(kfold, shuffle=True,random_state=1) skfind = [None] * kfold # skfind[i][0] -> train indices, skfind[i][1] -> test indices cnt = 0 for index in skf.split(X, Y): skfind[cnt] = index cnt += 1 score = np.zeros((5,len(X))) for i in range(kfold): train_indices = skfind[i][0] test_indices = skfind[i][1] X_train = X[train_indices] y_train = Y[train_indices] index_train = indices[train_indices] X_test = X[test_indices] y_test = Y[test_indices] index_test = indices[test_indices] self.__runOPF(X_train,y_train,index_train,X_test,y_test,index_test, score[i]) output= np.zeros((len(indices),8)) score_t = np.transpose(score) output[:,0] =indices output[:,1] =Y output[:,2] =np.sum(score_t,axis=1) output[:,3:] =score_t return output
grdSearch = GridSearchCV(modelXgbEmpty, { 'max_depth': [2, 4, 8, 10], 'n_estimators': [50, 100, 200, 400] }, verbose=1, error_score='accuracy') grdSearch.fit(X_train, y_train) grdSearch.best_score_, grdSearch.best_params_ y_pred = grdSearch.predict(X_test) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) #Testando Algoritmo OPF (Não é Essemble apenas para demonstração) modelOPF = SupervisedOPF(distance='manhattan') # manhattan = 74 # squared_euclidean 72 # log_euclidean 74 # bray_curtis 71 # canberra 71 # log_squared_euclidean 74 # squared_euclidean 72 # gaussian 37 # squared_cord 53 y_train_opf = y_train + 1 y_test_opf = y_test + 1 modelOPF.fit(X_train, y_train_opf) predsOPF = modelOPF.predict(X_test) print(accuracy_score(y_test_opf, predsOPF))