Exemplo n.º 1
0
	def __runOPF(self, X_train,y_train,index_train,X_test,y_test,index_test, score):
		# Creates a SupervisedOPF instance
		opf = SupervisedOPF(distance='log_squared_euclidean',
		                    pre_computed_distance=None)

		# Fits training data into the classifier
		opf.fit(X_train, y_train, index_train)
		
		# Predicts new data
		preds, conqs = opf.predict(X_test)
		
		self.__computeScore(y_test, preds, conqs, score)
Exemplo n.º 2
0
def supervised_opf_feature_selection(opytimizer):
    # Gathers features
    features = opytimizer[:, 0].astype(bool)

    # Remaking training and validation subgraphs with selected features
    X_train_selected = X_train[:, features]
    X_val_selected = X_val[:, features]

    # Creates a SupervisedOPF instance
    opf = SupervisedOPF(distance='log_squared_euclidean',
                        pre_computed_distance=None)

    # Fits training data into the classifier
    opf.fit(X_train_selected, Y_train)

    # Predicts new data
    preds = opf.predict(X_val_selected)

    # Calculates accuracy
    acc = g.opf_accuracy(Y_val, preds)

    return 1 - acc
Exemplo n.º 3
0
import opfython.stream.splitter as s
from opfython.models.supervised import SupervisedOPF

# Loading a .txt file to a numpy array
txt = l.load_txt('data/boat.txt')

# Parsing a pre-loaded numpy array
X, Y = p.parse_loader(txt)

# Splitting data into training and validation sets
X_train, X_val, Y_train, Y_val = s.split(X, Y, percentage=0.5, random_state=1)

# Creates a always true loop
while True:
    # Creates a SupervisedOPF instance
    opf = SupervisedOPF(distance='log_squared_euclidean',
                        pre_computed_distance=None)

    # Fits training data into the classifier
    opf.fit(X_train, Y_train)

    # Predicts new data
    preds = opf.predict(X_val)

    # Calculating accuracy
    acc = g.opf_accuracy(Y_val, preds)

    print(f'Accuracy: {acc}')

    # Gathers which samples were missclassified
    errors = np.argwhere(Y_val != preds)
    n = np.sqrt(np.random.rand(n_points, 1)) * 780 * (2 * np.pi) / 360
    d1x = -np.cos(n) * n + np.random.rand(n_points, 1) * noise
    d1y = np.sin(n) * n + np.random.rand(n_points, 1) * noise
    return (np.vstack((np.hstack((d1x, d1y)), np.hstack(
        (-d1x, -d1y)))), np.hstack((np.zeros(n_points), np.ones(n_points))))


X, y = twospirals(1000)

names = [
    "Nearest Neighbors", "RBF SVM", "Decision Tree", "Naive Bayes", "OPF",
    "VotingClassifier", "Random Forest", "AdaBoost", "XGBoost"
]

#opf = SupervisedOPF(distance = "log_squared_euclidean")
opf = SupervisedOPF()
#opf = KNNSupervisedOPF(max_k = 1)

classifiers = [
    KNeighborsClassifier(3),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(max_depth=10),
    GaussianNB(), opf,
    VotingClassifier(estimators=[('knn', KNeighborsClassifier(3)),
                                 ('svm', SVC(gamma=2, C=1, probability=True)),
                                 ('gnb', GaussianNB()),
                                 ('dt', DecisionTreeClassifier(max_depth=10))],
                     weights=[1.2, 1.3, 1.1, 0.9],
                     voting="soft",
                     n_jobs=4),
    RandomForestClassifier(max_depth=10, n_estimators=100),
Exemplo n.º 5
0
import opfython.stream.loader as l
import opfython.stream.parser as p
import opfython.stream.splitter as s
from opfython.models.supervised import SupervisedOPF

# Loading a .txt file to a numpy array
txt = l.load_txt('data/boat.txt')

# Parsing a pre-loaded numpy array
X, Y = p.parse_loader(txt)

# Splitting data into training and validation sets
X_train, X_val, Y_train, Y_val = s.split(
    X, Y, percentage=0.5, random_state=1)

# Creates a SupervisedOPF instance
opf = SupervisedOPF(distance='log_squared_euclidean',
                    pre_computed_distance=None)

# Performs the learning procedure
opf.learn(X_train, Y_train, X_val, Y_val, n_iterations=10)
Exemplo n.º 6
0
	def __init__(self, path_output):
		self.opfSup = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None)
		self.path_output=path_output
Exemplo n.º 7
0
class US(object):

	def __init__(self, path_output):
		self.opfSup = SupervisedOPF(distance='log_squared_euclidean', pre_computed_distance=None)
		self.path_output=path_output

	def __classify(self, x_train,y_train, x_valid, y_valid, minority_class):
		# Training the OPF                
		indexes = np.arange(len(x_train))
		self.opfSup.fit(x_train, y_train,indexes)

		# Prediction of the validation samples
		y_pred,_ = self.opfSup.predict(x_valid)
		y_pred = np.array(y_pred)
		
		# Validation measures for this k nearest neighbors
		accuracy = accuracy_score(y_valid, y_pred)
		recall = recall_score(y_valid, y_pred, pos_label=minority_class) # assuming that 2 is the minority class
		f1 = f1_score(y_valid, y_pred, pos_label=minority_class)
		return accuracy, recall, f1, y_pred


	def __saveResults(self, X_train,Y_train, X_test, Y_test,  ds,f, approach, minority_class):

		path = '{}/down_{}/{}/{}'.format(self.path_output,approach,ds,f)
		if not os.path.exists(path):
			os.makedirs(path)

		results_print=[]
		accuracy, recall, f1, pred = self.__classify(X_train,Y_train, X_test, Y_test, minority_class)
		results_print.append([0,accuracy, recall, f1])

		np.savetxt('{}/pred.txt'.format(path), pred, fmt='%d')
		np.savetxt('{}/results.txt'.format(path), results_print, fmt='%d,%.5f,%.5f,%.5f')

	def __saveDataset(self, X_train,Y_train, pathDataset,ds_name):
		DS = np.insert(X_train,len(X_train[0]),Y_train , axis=1)
		np.savetxt('{}/train_{}.txt'.format(pathDataset, ds_name),DS,  fmt='%.5f,'*(len(X_train[0]))+'%d')

	def __computeScore(self, labels, preds, conqs, score):
		
		for i in range(len(labels)):
		    if labels[i]==preds[i]:
		        score[conqs[i]]+=1
		    else:
		        score[conqs[i]]-=1

	def major_negative(self, output, X, Y,  X_test, Y_test, path, majority_class, ds, f, minority_class):
		#1st case: remove samples from majoritary class with negative scores        
		output_majority = output[output[:,1]==majority_class]
		output_majority_negative = output_majority[output_majority[:,2]<0]

		X_train = np.delete(X, output_majority_negative[:,0],0)
		Y_train = np.delete(Y, output_majority_negative[:,0])
		self.__saveDataset(X_train,Y_train, path,'major_negative')
		self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'major_negative', minority_class)


	def major_neutral(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class):
		#2st case: remove samples from majoritary class with negative or zero scores
		output_majority = output[output[:,1]==majority_class]
		output_majority_neutal = output_majority[output_majority[:,2]<=0]

		X_train = np.delete(X, output_majority_neutal[:,0],0)
		Y_train = np.delete(Y, output_majority_neutal[:,0])
		self.__saveDataset(X_train,Y_train, path,'major_neutral')
		self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'major_neutral', minority_class)

	def negative(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class):
		#3st case: remove all samples with negative
		output_negatives = output[output[:,2]<0]

		X_train = np.delete(X, output_negatives[:,0],0)
		Y_train = np.delete(Y, output_negatives[:,0])
		self.__saveDataset(X_train,Y_train, path,'negative')
		self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'negative', minority_class)

	def negatives_major_zero(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class):
		#4st case: remove samples from majoritary class with negative or zero scores 
		# and from minoritary class with negative scores
		output_negatives = output[output[:,2]<0]

		output_negatives_major_zero = output_negatives[output_negatives[:,1]==majority_class]
		output_negatives_major_zero = output_negatives_major_zero[output_negatives_major_zero[:,2]<=0]

		X_train = np.delete(X, output_negatives_major_zero[:,0],0)
		Y_train = np.delete(Y, output_negatives_major_zero[:,0])
		self.__saveDataset(X_train,Y_train, path,'negatives_major_zero')
		self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'negatives_major_zero', minority_class)

	def balance(self, output, X, Y, X_test, Y_test, path, majority_class, ds, f, minority_class):
		#5st case: remove samples from majoritary class until balancing the dataset

		# find the number of samples to remove
		n_samples = len(output)
		n_samples_minority = len(output[output[:,1]==2])
		n_samples_to_remove = n_samples - (n_samples_minority*2)

		# sort samples from majority class by score
		output_majority= output[output[:,1]==majority_class]
		order = np.argsort(output_majority[:,2])
		output_majority_ordered = output_majority[order,:]

		# remove samples
		output_to_remove = output_majority_ordered[:n_samples_to_remove,:]
		X_train = np.delete(X, output_to_remove[:,0],0)
		Y_train = np.delete(Y, output_to_remove[:,0])

		# save new dataset and results
		self.__saveDataset(X_train,Y_train, path,'balance')
		self.__saveResults(X_train,Y_train, X_test, Y_test, ds,f, 'balance', minority_class)

	def __runOPF(self, X_train,y_train,index_train,X_test,y_test,index_test, score):
		# Creates a SupervisedOPF instance
		opf = SupervisedOPF(distance='log_squared_euclidean',
		                    pre_computed_distance=None)

		# Fits training data into the classifier
		opf.fit(X_train, y_train, index_train)
		
		# Predicts new data
		preds, conqs = opf.predict(X_test)
		
		self.__computeScore(y_test, preds, conqs, score)


	
	def run(self, X, Y, indices):
		# Create stratified k-fold subsets
		kfold = 5 # no. of folds
		skf = StratifiedKFold(kfold, shuffle=True,random_state=1)
		skfind = [None] * kfold  # skfind[i][0] -> train indices, skfind[i][1] -> test indices
		cnt = 0
		for index in skf.split(X, Y):
			skfind[cnt] = index
			cnt += 1		
		
		score = np.zeros((5,len(X)))

		for i in range(kfold):
			train_indices = skfind[i][0]   
			test_indices = skfind[i][1]
			X_train = X[train_indices]
			y_train = Y[train_indices]
			index_train = indices[train_indices]
		
		
			X_test = X[test_indices]
			y_test = Y[test_indices]
			index_test = indices[test_indices]
			self.__runOPF(X_train,y_train,index_train,X_test,y_test,index_test, score[i])
		

		output=  np.zeros((len(indices),8))

		score_t = np.transpose(score)
		output[:,0] =indices
		output[:,1] =Y
		output[:,2] =np.sum(score_t,axis=1)
		output[:,3:] =score_t

		return output
Exemplo n.º 8
0
modelXgbEmpty = XGBClassifier()

grdSearch = GridSearchCV(modelXgbEmpty, {
    'max_depth': [2, 4, 8, 10],
    'n_estimators': [50, 100, 200, 400]
},
                         verbose=1,
                         error_score='accuracy')
grdSearch.fit(X_train, y_train)
grdSearch.best_score_, grdSearch.best_params_
y_pred = grdSearch.predict(X_test)
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

#Testando Algoritmo OPF (Não é Essemble apenas para demonstração)
modelOPF = SupervisedOPF(distance='manhattan')
# manhattan = 74
# squared_euclidean 72
# log_euclidean 74
# bray_curtis 71
# canberra 71
# log_squared_euclidean 74
# squared_euclidean 72
# gaussian 37
# squared_cord 53

y_train_opf = y_train + 1
y_test_opf = y_test + 1

modelOPF.fit(X_train, y_train_opf)
predsOPF = modelOPF.predict(X_test)
Exemplo n.º 9
0
from opfython.models.supervised import SupervisedOPF

# Creates a SupervisedOPF instance
opf = SupervisedOPF(distance='log_squared_euclidean',
                    pre_computed_distance=None)
Exemplo n.º 10
0
        Args:
            obj (BaseClassifier | OPF): A BaseClassifier or OPF child instance.

        """

        # Creates a property to hold the class itself
        self.obj = obj


# Defines a classifier dictionary constant with the possible values
CLF = dict(
    dt=Classifier(DecisionTreeClassifier()),
    linear_svc=Classifier(LinearSVC()),
    lr=Classifier(LogisticRegression()),
    nb=Classifier(GaussianNB()),
    opf=Classifier(SupervisedOPF()),
    opf_meta=Classifier(SupervisedOPF(distance='canberra')),
    rf=Classifier(RandomForestClassifier()),
    svc=Classifier(SVC()),
)


def get_clf(name):
    """Gets a classifier by its identifier.

    Args:
        name (str): Classfier's identifier.

    Returns:
        An instance of the Classifier class.