def saveResults(self): for i in range(0, len(self.predictions)): self.test_data_set.set_value(i, self.class_name, self.predictions[i]) DataSet.saveResults(self.result_path, self.iteration, self.test_data_set)
def __init__(self, images, labels, validation_size=1000, test_size=2000): self._images = images self._labels = labels self.train = DataSet( np.array(images[validation_size:(NUM_EXAMPLES - test_size)]), np.array(labels[validation_size:(NUM_EXAMPLES - test_size)])) self.test = DataSet(np.array(images[NUM_EXAMPLES - test_size:]), np.array(labels[NUM_EXAMPLES - test_size:])) self.validation = DataSet(np.array(images[:validation_size]), np.array(labels[:validation_size])) self.dataset = dataset(self.train, self.test, self.validation)
def loadTrainingData(self): for i in range(1, (self.k + 1)): if (((self.k + 1) - i) != self.iteration): new_sub_data_set = DataSet.loadSubDataSet(self.file_path + "sub_data_set_" + str(i) + ".csv") if (i == 1): self.training_sub_data_set = new_sub_data_set else: self.training_sub_data_set = DataSet.concatSubDataSet( self.training_sub_data_set, new_sub_data_set) del (new_sub_data_set) print(self.training_sub_data_set)
class TestFilter(unittest.TestCase): # path = os.environ.get("THESIS") + "/data/testData" path = "../data/testData" data = DataSet(path) filter = Filter(data, EuclidianDistance()) itemFilt = ItemBasedFilter(data, EuclidianDistance()) def testRecommendations(self): 'based on data, Dave should have HungerGames as a recommendation' recs = self.filter.getRecommendations("Dave") recsDict = dict(recs) self.assertIn('HungerGames', recsDict) def test_kNearestNeighbors(self): '''Bob should be one of Dave's nearest neighbors''' nearest = self.filter.kNearestNeighbors("Dave", 2) print(nearest) nearestNames = [tup[1] for tup in nearest] self.assertIn("Bob", nearestNames) def test_getItemSimData(self): '''Should be a dict with items as keys''' print(self.itemFilt.itemSimDict) self.assertIn("HungerGames", self.itemFilt.itemSimDict, "HungerGames is not in itemSimDict") self.assertIsInstance(self.itemFilt.itemSimDict, dict, "itemSimDict is not a dict.")
def loadTestData(self): self.teste_sub_data_set = DataSet.loadSubDataSet(self.file_path + "sub_data_set_" + str((self.k + 1) - self.iteration) + ".csv") print(self.teste_sub_data_set)
def trainNetwork(shape, learningRate, dataSetPath, epochs, networkType, tensorboard, saveinterval, savepath): dataSet = DataSet(dataSetPath, [shape[0], shape[-1]]) if networkType == 'np': network = Network(shape, ActivationFunction.tanh, dataSet) network.train(epochs=epochs, learningRate=learningRate, verbosity=10, saveStep=saveinterval) elif networkType == 'tf': tensorboardPath = None if tensorboard is None else ( (tensorboard[0] if tensorboard[0][-1] == '/' else tensorboard[0] + '/') if len(tensorboard) > 0 else './log/') network = NetworkTF(shape, learningRate=learningRate, dataSet=dataSet, tensorboard=tensorboardPath) network.train(epochs=epochs, verbosity=10, saveStep=saveinterval, savePath=savepath)
def train(self): dataset = DataSet('./data/') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
def trainNetwork(hiddenLayerShape, learningRate, dataSetPath, epochs): dataSet = DataSet(dataSetPath, [3, 1]) # Vanilla # network = Network([3] + hiddenLayerShape + [1], ActivationFunction.tanh, dataSet) # network.train(epochs=epochs, learningRate=learningRate, verbosity=10, saveNet=10) # TF network = NetworkTF([3] + hiddenLayerShape + [1], learningRate=learningRate, dataSet=dataSet) network.train(epochs=epochs, saveStep=150, verbosity=0)
self.model.save(file_path) print('Model Saved.') def load(self, file_path=FILE_PATH): if self.read_save and os.path.exists(file_path): self.model = load_model(file_path) else: self.build_model() print('Model Loaded.') #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet(datasets_path) dataset.check() model = Model(read_save=read_save) model.read_trainData(dataset) model.load() model.train_model() model.evaluate_model() model.save()
from sklearn.metrics import accuracy_score def predict(model, dataset, vari, deli, day): X,Y = dataset.bootstrap(vari, deli, day, n=10**4) model.fit(X, Y) x = dataset.get_x(vari, deli, day) return model.predict(x) if __name__=='__main__': vari,deli,m = 'cu','1712',7 db = DB() days = db.execute("select date from contract_daily where vari=%s and deli=%s order by date asc", (vari, deli))[30:] data = DataSet(m, 'direction') model = NaiveBayes(m) y_real = [] y_pred = [] for day in days: model.fit(*data.bootstrap(vari, deli, day, 300)) r = model.predict(data.get_x(vari, deli, day)) if r[0]>=r[1] and r[0]>=r[2]: r = 0 elif r[1]>=r[0] and r[1]>=r[2]: r = 1 else: r = 2 y_pred.append(r)
def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) def predict(self, img): img = img.reshape( (1, self.IMAGE_SIZE, self.IMAGE_SIZE, 1) ) #make sure input img is on "channel = 1" and img size is "IMAGE_SIZE" img = img.astype('float32') img = img / 255.0 #img is gray scale result = self.model.predict_proba( img) #calculate img probability in label max_index = np.argmax(result) #find highest probability return max_index, result[0][ max_index] #first parameter = highest probability label,second parameter = probability ratio if __name__ == '__main__': dataset = DataSet( "C:\\Users\\jimmychen\\Desktop\\chernger\\chernger_faceRecognition\\dataset" ) model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
print('test loss;', loss) print('test accuracy:', accuracy) def save(self, file_path=FILE_PATH): print('Model Saved.') self.model.save(file_path) def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet('dataset') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
def save(self, file_path=FILE_PATH): print('Model Saved.') self.model.save(file_path) def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet(r'D:/my_laboratory/face_detection20180516/dataset') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
def saveResults(self): for i in range(0, len(self.predictions)): self.test_data_set.set_value(i, 'classe', self.predictions[i]) DataSet.saveResults("clusteredDensityKnn", self.iteration, self.test_data_set)
import pandas as pd import numpy as np import matplotlib.pyplot as plt import plotFunctions as pF from dataSet import DataSet import sklearn.metrics as metrics from sklearn import preprocessing from sklearn.model_selection import train_test_split from xgboost import XGBClassifier filedir = '/Users/marianamota/Desktop/DataScience/Data/' data = DataSet(filedir + 'pd_speech_features.csv', 'id', 1) def Boost(drop: bool = True, norm: bool = False, threshold: float = 1): if drop: full_set = data.compute_data_drop(threshold) else: full_set = data.compute_data_average(threshold) y: np.ndarray = full_set.pop('class').values if norm: X: np.ndarray = preprocessing.normalize(full_set.values) else: X: np.ndarray = full_set.values labels = pd.unique(y) trnX, tstX, trnY, tstY = train_test_split(X, y, train_size=0.7, stratify=y)
def run(self): self.number_false_positives = 0 self.number_false_negatives = 0 self.number_true_positives = 0 self.number_true_negatives = 0 self.total_samples = 0 self.acc_samples = 0 self.err_samples = 0 print(self.result_path) result_dataframe = DataSet.loadResult(self.result_path, self.iteration) #obtem numero de classes diferentes existentes no atributos classe self.classes = Preprocessor.getClassesPerColumns( self.test_data_set, self.class_name) acc_classes = [] err_classes = [] #posicao do atributo "classe" no vetor posicao_classe = len(result_dataframe.values[0]) - 2 for i in range(0, len(result_dataframe.values)): self.total_samples += 1 #print("Real: " + str(self.test_data_set.values[i,posicao_classe]) + " -- predito: " + str(result_dataframe.values[i,posicao_classe])) if (self.test_data_set.values[i, posicao_classe] == '0' or self.test_data_set.values[i, posicao_classe] == '0.0' or self.test_data_set.values[i, posicao_classe] == 0): if (result_dataframe.values[i, posicao_classe] == 0 or result_dataframe.values[i, posicao_classe] == '0' or result_dataframe.values[i, posicao_classe] == '0.0'): #print("FALSO E CLASSIFICOU COMO FALSO") self.number_true_negatives += 1 self.acc_samples += 1 else: #print("FALSO E CLASSIFICOU COMO VERDADEIRO") self.number_false_positives += 1 self.err_samples += 1 elif (self.test_data_set.values[i, posicao_classe] == '1' or result_dataframe.values[i, posicao_classe] == '1.0' or self.test_data_set.values[i, posicao_classe] == 1): if (result_dataframe.values[i, posicao_classe] == 1 or result_dataframe.values[i, posicao_classe] == '1' or result_dataframe.values[i, posicao_classe] == '1.0'): #print("VERDADEIRO E CLASSIFICOU COMO VERDADEIRO") self.number_true_positives += 1 self.acc_samples += 1 else: #print("VERDADEIRO E CLASSIFICOU COMO FALSO") self.number_false_negatives += 1 self.err_samples += 1 #arquivos para salvar informacoes resumidas das k iteracoes do cross-validation (cada linha representa uma iteracao) arquivoMatriz = open(self.result_path + 'Matriz.txt', 'a+') #salva no formato: VP, FP, FN, VN textoMatriz = str(self.number_true_positives) + """,""" + str( self.number_false_positives) + """,""" + str( self.number_false_negatives) + """,""" + str( self.number_true_negatives) + """ """ arquivoMatriz.write(textoMatriz) arquivoMatriz.close() arquivoTempo = open(self.result_path + 'tempo.txt', 'a+') # salva no formato: tempo execucao completo, tempo treino, tempo teste textoTempo = str(self.tempo_execucao) + """,""" + str( self.training_time) + """,""" + str(self.test_time) + """ """ arquivoTempo.write(textoTempo) arquivoTempo.close() arquivoInfos = open(self.result_path + 'infos.txt', 'a+') #salva no formato: total exemplos, total exemplos corretamento classficados, total exemplos erroneamente classificados, % exemplos corretamente classificados (acuracia), % exemplos erroneamente classificados (taxa de erro) textoInfos = str(self.total_samples) + """,""" + str( self.acc_samples) + """,""" + str( self.err_samples) + """,""" + str( (100 / float(self.total_samples)) * self.acc_samples) + """,""" + str( (100 / float(self.total_samples)) * self.err_samples) + """ """ arquivoInfos.write(textoInfos) arquivoInfos.close() #salva matriz de confusao no arquivo final_info de cada iteracao arquivo = open( self.result_path + 'final_info_' + str(self.iteration) + '.txt', 'w') texto = """ MATRIZ DE CONFUSAO Predicao ATAQUE NORMAL |--------||--------| ATAQUE | """ + str(self.number_true_positives) + """ || """ + str( self.number_false_negatives) + """ | |--------||--------| NORMAL | """ + str(self.number_false_positives) + """ || """ + str( self.number_true_negatives) + """ | |--------||--------| """ texto += """TOTAL DE EXEMPLOS: """ + str( self.total_samples) + """ | |--------||--------| """ texto += """TOTAL DE EXEMPLOS CORRETOS: """ + str( self.acc_samples) + """ | |--------||--------| """ texto += """TOTAL DE EXEMPLOS ERRADOS: """ + str( self.err_samples) + """ | |--------||--------| """ texto += """PORCENTAGEM ACERTOS: """ + str( (100 / float(self.total_samples)) * self.acc_samples) + """ | |--------||--------| """ texto += """PORCENTAGEM ERROS: """ + str( (100 / float(self.total_samples)) * self.err_samples) + """ | |--------||--------| """ texto += """TEMPO DE EXECUCAO: """ + str( self.tempo_execucao) + """ ||| """ texto += """TEMPO DE TREINO: """ + str(self.training_time) + """ ||| """ texto += """TEMPO DE TESTE: """ + str(self.test_time) + """ ||| """ #recupera quantidade de exmemplos que foram submtidos ao KNN if (DataSet.checkPathBoolean(self.result_path + "../knn_classification/")): data_set_knn = DataSet.loadSubDataSet( self.result_path + "../knn_classification/cross_" + str(self.iteration) + "_final_result.csv") texto += """Exemplos submetidos a segunda classificacao: """ + str( len(data_set_knn)) arquivoKNN = open(self.result_path + 'KNN.txt', 'a+') textoKNN = str(len(data_set_knn)) + """ """ arquivoKNN.write(textoKNN) arquivoKNN.close() arquivo.write(texto) arquivo.close()
def save(self, file_path=FILE_PATH): print('Model Saved.') self.model.save(file_path) def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet('/Users/suhe/Desktop/img') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
for j in range(self.m): self.likelihood_tensor[y, j, x[j]] += 1 for y in range(self.y_classes): for x_dim in range(self.m): self.likelihood_tensor[y, x_dim] = [(self.likelihood_tensor[y,x_dim,x]+self.laplace)/(self.prior[y]+self.x_classes*self.laplace) for x in range(self.x_classes)] self.prior = np.array([(x+self.laplace)/(n+self.y_classes*self.laplace) for x in self.prior]) def predict(self, x): P = [1.] * self.y_classes for i in range(self.y_classes): P[i] = self.prior[i] for j in range(self.m): P[i] *= self.likelihood(i, j, x[j]) s = sum(P) P = [1.*x/s for x in P] res = {} for i in range(self.y_classes): res[i] = P[i] return res if __name__=='__main__': from dataSet import DataSet import datetime data = DataSet(5, 'direction') model = NaiveBayes(5) X,Y = data.bootstrap('cu', '1707', datetime.date(2017,6,6), n=300) model.fit(X, Y) print ('predict:', model.predict(data.get_x('cu', '1707', datetime.date(2017,6,6)))) print ('real:', data.get_y('cu', '1707', datetime.date(2017,6,6)))
print('Model Loaded.') self.model = load_model(file_path) def predict(self,img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img/255.0 result = self.model.predict_proba(img) max_index = np.argmax(result) return max_index,result[0][max_index] if __name__ == '__main__': dataset = DataSet('D:\proj\dataset') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
from dataSet import DataSet sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/rna") sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/hybrid") sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/knn") from cross_validation import CrossValidation from preprocessor import Preprocessor from dataSet import DataSet from knn_classifier import KnnClassifier from rna_classifier import RnaClassifier from hybrid_classifier import HybridClassifier from rna_module import RnaModule from knn_module import KnnModule from evaluate_module import EvaluateModule dts = DataSet() dts.setFilePath("bases/sub_bases_nslkdd_20attribute/") #dts.setFileName("base_iris.csv") #dts.setFileName("SmallTrainingSet.csv") ##dts.setFileName("winequality-red.csv") #dts.setFileName("NSL_KDD-master/20PercentTrainingSet.csv") dts.setFileName("NSL_KDD-master/KDDTrain+binary_class.csv") #dts.setFileName("NSL_KDD-master/SmallTrainingSet.csv") #dts.setFileName("NSL_KDD-master/SmallTrainingSetFiveClass.csv") #dts.setFileName("../../KDDCUP99/kddcup10%.csv") #print("load data") #ts.loadData(10) #CONFIGURACAO DO KNN
self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self,img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img/255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index,result[0][max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': datast = DataSet('webface') model = Model() model.read_trainData(datast) model.build_model() model.train_model() model.evaluate_model() model.save()
def build_camera(self): hight = 0 color = (0, 255, 0) #opencv文件中人脸级联文件的位置,用于帮助识别图像或者视频流中的人脸 face_cascade = cv2.CascadeClassifier( 'C:\\ProgramData\\Anaconda3\\Lib\\site-packages\\cv2\\data\\haarcascade_frontalface_alt2.xml' ) #打开摄像头并开始读取画面 cameraCapture = cv2.VideoCapture(0) success, frame = cameraCapture.read() name_list = read_name_list('dataset') count = 0 while success and cv2.waitKey(1) == -1: success, frame = cameraCapture.read() count += 1 if count < 100: cv2.imwrite("images/yjd_sc/" + str(count) + ".jpg", frame) elif count == 200: #将摄像头采集的钱200张图像当做原始图片进行实时建模和模型的训练 cv2.imwrite("images/yjd_sc/" + str(count) + ".jpg", frame) dealImg('images\\yjd_sc', 'images\\yjd_deal') readPicSaveFace('images\\yjd_deal', 'dataset\\yjd_21_NEU_boy_001231', '.jpg', '.JPG', 'png', 'PNG') dataset = DataSet("dataset") self.model.read_trainData(dataset) self.model.build_model() self.model.train_model() self.model.evaluate_model() self.model.save() else: #将其实时训练好的模型应用于之后检测的图像 gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #图像灰化 faces = face_cascade.detectMultiScale(gray, 1.3, 5) #识别人脸 cv2.putText(frame, "Driver Info", (80, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 255, 2, 2) # 显示信息标题 if len(faces) == 0: cv2.rectangle(frame, (0, 0), (300, 100), color, 3) # 5控制绿色框的粗细 cv2.putText(frame, "No people !!!", (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2) # 显示陌生人信息 else: for (x, y, w, h) in faces: ROI = gray[x:x + w, y:y + h] ROI = cv2.resize(ROI, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR) label, prob = self.model.predict( ROI) # 利用模型对cv2识别出的人脸进行比对 if prob > 0.7: # 如果模型认为概率高于70%则显示为模型中已有的label show_name = name_list[label] else: show_name = 'Stranger' # 当识别出来的不是陌生人时,需要显示出司机信息 if show_name != "Stranger": info = show_name.split('_') infoName = 'Name:' + info[0] infoAge = 'Age:' + info[1] infoUniversity = 'University:' + info[2] infoSex = 'Sex:' + info[3] infoDrivingLN = 'DriverNumber:' + info[4] cv2.rectangle(frame, (0, 0), (280, len(faces) * 220), color, 3) # 5控制绿色框的粗细 cv2.putText(frame, info[0], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2) # 显示姓名信息 cv2.putText(frame, infoDrivingLN, (0, hight + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 3) # 显示驾驶证号码 cv2.putText(frame, infoName, (0, hight + 95), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 3) # 显示名字 cv2.putText(frame, infoAge, (0, hight + 130), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 3) # 显示年龄 cv2.putText(frame, infoUniversity, (0, hight + 165), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 3) # 显示学历 cv2.putText(frame, infoSex, (0, hight + 200), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 3) # 显示性别 else: cv2.rectangle(frame, (0, 0), (300, 100), color, 3) # 5控制绿色框的粗细 cv2.putText(frame, "Stranger", (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2) # 显示陌生人信息 cv2.putText(frame, "It is a Stranger", (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2) # 显示陌生人信息 frame = cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) # 在人脸区域画一个正方形出来 cv2.imshow("Camera", frame) cameraCapture.release() cv2.destroyAllWindows()
print('Model Loaded.') self.model = load_model(file_path) #Need to ensure that the input img is grayed out (channel =1) and the size is IMAGE_SIZE face image def predict(self,img): img = img.reshape((1, 480, 640,1)) img = img.astype('float32') img = img/255.0 result = self.model.predict_proba(img) #Calculate the probability that the img belongs to a label max_index = np.argmax(result) #Find the highest probability return max_index,result[0][max_index] #The first parameter is the index of the label with the highest probability, and the second parameter is the corresponding probability. if __name__ == '__main__': datast = DataSet('im') model = Model() model.read_trainData(datast) model.build_model() model.train_model() model.evaluate_model() model.save() #score=model.evaluate()
self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self,img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img/255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index,result[0][max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet('D:\\Python\\python_study\\face_detection\\image\\picTest') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
def run(self): self.rna_classified_samples= [] self.intermediate_range_samples = [] self.rna.setDataSet(self.data_set) self.rna.setTestDataSet(self.test_data_set) self.knn.setDataSet(self.data_set) training_time_start = time.time() #funcao para gerar o modelo neural para a abordagem hibrida outputs_training, predictions, history = self.rna.generateHybridModelNovo() #print (np.percentile(outputs_training,75)) positivos = 0 negativos = 0 valor_negativo = 0 valor_positivo = 0 positivos_serie = [] negativos_serie = [] #divide os valores da camada de saida da ultima iteracao do treinamento em conjunto de positivos e de negativos for i in range(0,len(outputs_training)): if(predictions[i] == 0 ): negativos = negativos + 1 valor_negativo = valor_negativo + outputs_training[i] negativos_serie.append(outputs_training[i]) elif(predictions[i] == 1): positivos = positivos + 1 valor_positivo = valor_positivo + outputs_training[i] positivos_serie.append(outputs_training[i]) #cria base de exemplos do KNN self.knn.buildExamplesBase() self.training_time = time.time() - training_time_start list_position_rna_classified_samples = [] list_position_intermediate_range_samples = [] test_time_start = time.time() #inicia teste #realiza classificacao atraves da RNA self.predictions_rna = self.rna.predict() self.test_time = time.time() - test_time_start tamanho_predicao = len(self.predictions_rna) tamanho_data_set = len(self.test_data_set.values) #posicao do atributo "classe" no vetor posicao_classe = len(self.test_data_set.values[0]) - 2 if (self.verifyClassesPredictions(predictions) == True): #define os limites superiores e inferiores de acordo com os valores de percentil para definir a faixa intermediaria (valores de percentil sao setados no arquivo main.py) self.upper_threshold = np.percentile(positivos_serie,self.percentil_faixa_sup) self.lower_threshold = np.percentile(negativos_serie,(self.percentil_faixa_inf)) #verifica se valor esta dentro dos limites ou fora for i in range(0,len(self.predictions_rna)): print(self.predictions_rna[i]) if(self.predictions_rna[i] > (self.upper_threshold) ): #print("CLASSIFICACAO CONFIAVEL!") #realiza as modificacoes no dataframe dos exemplos originais de teste de acordo com a classificacao da RNA self.test_data_set.set_value(i, self.class_name, 1) elif( self.predictions_rna[i] < (self.lower_threshold)): #print("CLASSIFICACAO CONFIAVEL!") #realiza as modificacoes no dataframe dos exemplos originais de teste de acordo com a classificacao da RNA self.test_data_set.set_value(i, self.class_name, 0) else: #print("FAIXA INTERMEDIARIA!") #adiciona exemplos em um vetor de exemplos classificados como intermediarios self.intermediate_range_samples.append(self.test_data_set.values[i,:]) list_position_intermediate_range_samples.append(i) del(self.predictions_rna) #cria um dataframe de exemplos classificados pela RNA dataframe_rna_classified_samples = pandas.DataFrame( data= self.rna_classified_samples, index= list_position_rna_classified_samples, columns= self.test_data_set.columns) print(dataframe_rna_classified_samples) #salva os resultados gerados pela RNA DataSet.saveResults( self.result_path + "rna_classification/", self.iteration, dataframe_rna_classified_samples) del(dataframe_rna_classified_samples) del(list_position_rna_classified_samples) else: for i in range(0,len(self.predictions_rna)): self.intermediate_range_samples.append(self.test_data_set.values[i,:]) list_position_intermediate_range_samples.append(i) #cria um dataframe de exemplos classificados como intermediarios dataframe_intermediate_range_samples = pandas.DataFrame( data= self.intermediate_range_samples, index= list_position_intermediate_range_samples, columns= self.test_data_set.columns) #seta o dataframe de exemplos intermediarios como conjunto de teste para o KNN self.knn.setTestDataSet(dataframe_intermediate_range_samples) #salva os exemplos enviados para o KNN apenas para possivel identificacao posterior DataSet.saveResults( self.result_path + "knn_classification/", self.iteration, dataframe_intermediate_range_samples) test_time_start = time.time() #executa o KNN para classificar os exemplos do conjunto de teste self.predictions_knn = self.knn.run() self.test_time = self.test_time + (time.time() - test_time_start) del(self.data_set) del(dataframe_intermediate_range_samples) #realiza as modificacoes no dataframe dos exemplos originais de teste de acordo com a classificacao do KNN for i in range(0,len(self.predictions_knn)): self.test_data_set.set_value(list_position_intermediate_range_samples[i], self.class_name, self.predictions_knn[i]) #salva o data frame modificado como o resultado final DataSet.saveResults( self.result_path + "final_method_classification/", self.iteration, self.test_data_set) del(self.test_data_set)
def loadData(self): os.system('clear') self.dataLocation = input(prompt) self.dataSet = DataSet(self.dataLocation)
print('Model Saved.') self.model.save(file_path) def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet('D:\opencv\pictures\dataset') print(dataset) #输出结果<dataSet.DataSet object at 0x00000000011A10F0> 实例化后的DataSet类 model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
from dataSet import DataSet sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/rna") sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/hybrid") sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/knn") from cross_validation import CrossValidation from preprocessor import Preprocessor from dataSet import DataSet from knn_classifier import KnnClassifier from rna_classifier import RnaClassifier from hybrid_classifier import HybridClassifier from rna_module import RnaModule from knn_module import KnnModule from evaluate_module import EvaluateModule dts = DataSet() dts.setFilePath("bases/gstav_first_attempt/") dts.setFileName("testes/base_iris.csv") #dts.setFileName("SmallTrainingSet.csv") #dts.setFileName("winequality-red.csv") #dts.setFileName("NSL_KDD-master/20PercentTrainingSet.csv") #dts.setFileName("NSL_KDD-master/KDDTrain+binary_class.csv") #dts.setFileName("NSL_KDD-master/SmallTrainingSet.csv") #dts.setFileName("NSL_KDD-master/SmallTrainingSetFiveClass.csv") #dts.setFileName("../../KDDCUP99/kddcup10%.csv") #print("load data") dts.loadData(6)
def save(self, file_path=FILE_PATH): print('Model Saved.') self.model.save(file_path) def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet('D:\myProject\pictures\dataset') model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()
def loadTestData(self): self.testData = DataSet.loadSubDataSet(self.file_path + "fold_" + str(self.iteration) + ".csv")
sys.path.append( os.path.dirname(os.path.realpath(__file__)) + "/../../../hybrid") sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../../..") sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/../../../knn") from cross_validation import CrossValidation from preprocessor import Preprocessor from dataSet import DataSet from knn_classifier import KnnClassifier from rna_classifier import RnaClassifier from hybrid_classifier import HybridClassifier from rna_module import RnaModule from knn_module import KnnModule from evaluate_module import EvaluateModule dts = DataSet() dts.setFilePath("bases/sub_bases/") #CONFIGURACAO DA REDE NEURAL rna = RnaModule() rna.setNumberNeuronsImputLayer(30) rna.setActivationFunctionImputLayer("tanh") rna.setImputDimNeurons(30) rna.setNumberNeuronsHiddenLayer(31) rna.setActivationFunctionHiddenLayer("tanh") rna.setNumberNeuronsOutputLayer(1) rna.setActivationFunctionOutputLayer("tanh") rna_classifier = RnaClassifier() rna_classifier.setRna(rna) #PREPROCESSADOR PARA ATRIBUTOS CATEGORICOS
def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model = load_model(file_path) #需要确保输入的img得是灰化之后(channel =1 )且 大小为IMAGE_SIZE的人脸图片 def predict(self, img): img = img.reshape((1, 1, self.IMAGE_SIZE, self.IMAGE_SIZE)) img = img.astype('float32') img = img / 255.0 result = self.model.predict_proba(img) #测算一下该img属于某个label的概率 max_index = np.argmax(result) #找出概率最高的 return max_index, result[0][ max_index] #第一个参数为概率最高的label的index,第二个参数为对应概率 if __name__ == '__main__': dataset = DataSet('/home/hezhiqiang/PycharmProjects/pictures/dataset') # dataset = extract_data('/home/hezhiqiang/PycharmProjects/pictures/dataset') # print dataset.X_test # print dataset.Y_test model = Model() model.read_trainData(dataset) model.build_model() model.train_model() model.evaluate_model() model.save()