# -*- coding: UTF-8 -*- import numpy as np from ocr import OCRNeuralNetwork from sklearn.cross_validation import train_test_split def test(data_matrix, data_labels, test_indices, nn): correct_guess_count = 0 for i in test_indices: test = data_matrix[i] prediction = nn.predict(test) if data_labels[i] == prediction: correct_guess_count += 1 return correct_guess_count / float(len(test_indices)) data_matrix = np.loadtxt(open('../data/simple_ocr/data.csv', 'rb'), delimiter = ',').tolist() data_labels = np.loadtxt(open('../data/simple_ocr/dataLabels.csv', 'rb')).tolist() # Create training and testing sets. train_indices, test_indices = train_test_split(list(range(5000))) print "PERFORMANCE" print "-----------" for i in xrange(5, 50, 5): nn = OCRNeuralNetwork(i, data_matrix, data_labels, train_indices, False) performance = str(test(data_matrix, data_labels, test_indices, nn)) print "{i} Hidden Nodes: {val}".format(i=i, val=performance)
HOST_NAME = 'localhost' PORT_NUMBER = 8000 HIDDEN_NODE_COUNT = 15 # Load data samples and labels into matrix data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter=',') data_labels = np.loadtxt(open('dataLabels.csv', 'rb')) # Convert from numpy ndarrays to python lists data_matrix = data_matrix.tolist() data_labels = data_labels.tolist() # If a neural network file does not exist, train it using all 5000 existing data samples. # Based on data collected from neural_network_design.py, 15 is the optimal number # for hidden nodes nn = OCRNeuralNetwork(HIDDEN_NODE_COUNT, data_matrix, data_labels, random.sample(range(5000), 5000)) # The origin code did not shuffle the data, which makes predict output 9 every time! class JSONHandler(BaseHTTPServer.BaseHTTPRequestHandler): def do_POST(s): response_code = 200 response = "" var_len = int(s.headers.get('Content-Length')) content = s.rfile.read(var_len) payload = json.loads(content) if payload.get('train'): nn.train(payload['trainArray']) nn.save()
HIDDEN_NODE_COUNT = 20 # 加载数据集 data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter=',') data_labels = np.loadtxt(open('dataLabels.csv', 'rb')) # 转换成list类型 data_matrix = data_matrix.tolist() data_labels = data_labels.tolist() # 数据集一共5000个数据,train_indice存储用来训练的数据的序号 train_indice = list(range(5000)) # 打乱训练顺序 random.shuffle(train_indice) nn = OCRNeuralNetwork(HIDDEN_NODE_COUNT, data_matrix, data_labels, train_indice) class JSONHandler(SimpleHTTPRequestHandler): """处理接收到的POST请求""" def do_POST(self): response_code = 200 response = "" var_len = int(self.headers.get('Content-Length')) content = self.rfile.read(var_len) payload = json.loads(content) # 如果是训练请求,训练然后保存训练完的神经网络 if payload.get('train'): nn.train(payload['trainArray']) nn.save()
HOST_NAME = 'localhost' PORT_NUMBER = 8000 HIDDEN_NODE_COUNT = 15 # Load data samples and labels into matrix data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter=',') data_labels = np.loadtxt(open('dataLabels.csv', 'rb')) # Convert from numpy ndarrays to python lists data_matrix = data_matrix.tolist() data_labels = data_labels.tolist() # If a neural network file does not exist, train it using all 5000 existing data samples. # Based on data collected from neural_network_design.py, 15 is the optimal number # for hidden nodes nn = OCRNeuralNetwork(HIDDEN_NODE_COUNT, data_matrix, data_labels, list(range(5000))) class JSONHandler(BaseHTTPServer.BaseHTTPRequestHandler): def do_POST(s): response_code = 200 response = "" var_len = int(s.headers.get('Content-Length')) content = s.rfile.read(var_len) payload = json.loads(content) if payload.get('train'): nn.train(payload['trainArray']) nn.save() elif payload.get('predict'): try:
cnt += 1 return cnt / float(len(test_indices)) # Load data samples and labels into matrix data_matrix = np.loadtxt(open('mydata.csv', 'rb'), delimiter=',').tolist() data_labels = np.loadtxt(open('mydataLabels.csv', 'rb')).tolist() # Create training and testing sets. train_indices, test_indices = train_test_split(list(range(len(data_matrix)))) print "PERFORMANCE" print "-----------" maxi, maxj = 10, 1 maxnn = OCRNeuralNetwork(400, 10, 10, data_matrix, data_labels, train_indices, 1) maxp = test(data_matrix, data_labels, test_indices, maxnn) for i in xrange(10, 50): for j in xrange(1, 10): nn = OCRNeuralNetwork(400, i, 10, data_matrix, data_labels, train_indices, j) p = test(data_matrix, data_labels, test_indices, nn) if p > maxp: maxi, maxj, maxp = i, j, p performance = str(p) print "{i} Hidden Nodes, {j} trainings: {val}".format(i=i, j=j, val=performance) print 'max:', maxi, 'Hidden Nodes,', maxj, 'trainings:', str(maxp)
PORT_NUMBER = 8000 INPUT_NODE_COUNT = 400 HIDDEN_NODE_COUNT = 49 NUM_OF_TRAINGING = 8 OUTPUT_NODE_COUNT = 10 data_matrix = np.loadtxt(open('mydata.csv', 'rb'), delimiter=',') data_labels = np.loadtxt(open('mydataLabels.csv', 'rb')) data_matrix = data_matrix.tolist() data_labels = data_labels.tolist() print 'begin train' nn = OCRNeuralNetwork(INPUT_NODE_COUNT, HIDDEN_NODE_COUNT, OUTPUT_NODE_COUNT, data_matrix, data_labels, list(range(len(data_matrix))), NUM_OF_TRAINGING) print 'end train' nn.save() class JSONHandler(BaseHTTPServer.BaseHTTPRequestHandler): def do_POST(s): response_code = 200 response = "" var_len = int(s.headers.get('Content-Length')) content = s.rfile.read(var_len) payload = json.loads(content) if payload.get('train'): TrainData = namedtuple('TrainData', ['y0', 'label'])