# -*- coding: UTF-8 -*-

import numpy as np
from ocr import OCRNeuralNetwork
from sklearn.cross_validation import train_test_split

def test(data_matrix, data_labels, test_indices, nn):
    correct_guess_count = 0
    for i in test_indices:
        test = data_matrix[i]
        prediction = nn.predict(test)
        if data_labels[i] == prediction:
            correct_guess_count += 1
    return correct_guess_count / float(len(test_indices))

data_matrix = np.loadtxt(open('../data/simple_ocr/data.csv', 'rb'), delimiter = ',').tolist()
data_labels = np.loadtxt(open('../data/simple_ocr/dataLabels.csv', 'rb')).tolist()

# Create training and testing sets.
train_indices, test_indices = train_test_split(list(range(5000)))

print "PERFORMANCE"
print "-----------"

for i in xrange(5, 50, 5):
    nn = OCRNeuralNetwork(i, data_matrix, data_labels, train_indices, False)
    performance = str(test(data_matrix, data_labels, test_indices, nn))
    print "{i} Hidden Nodes: {val}".format(i=i, val=performance)
示例#2
0
HOST_NAME = 'localhost'
PORT_NUMBER = 8000
HIDDEN_NODE_COUNT = 15

# Load data samples and labels into matrix
data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter=',')
data_labels = np.loadtxt(open('dataLabels.csv', 'rb'))

# Convert from numpy ndarrays to python lists
data_matrix = data_matrix.tolist()
data_labels = data_labels.tolist()

# If a neural network file does not exist, train it using all 5000 existing data samples.
# Based on data collected from neural_network_design.py, 15 is the optimal number
# for hidden nodes
nn = OCRNeuralNetwork(HIDDEN_NODE_COUNT, data_matrix, data_labels,
                      random.sample(range(5000), 5000))

# The origin code did not shuffle the data, which makes predict output 9 every time!


class JSONHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_POST(s):
        response_code = 200
        response = ""
        var_len = int(s.headers.get('Content-Length'))
        content = s.rfile.read(var_len)
        payload = json.loads(content)

        if payload.get('train'):
            nn.train(payload['trainArray'])
            nn.save()
示例#3
0
HIDDEN_NODE_COUNT = 20

# 加载数据集
data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter=',')
data_labels = np.loadtxt(open('dataLabels.csv', 'rb'))

# 转换成list类型
data_matrix = data_matrix.tolist()
data_labels = data_labels.tolist()

# 数据集一共5000个数据,train_indice存储用来训练的数据的序号
train_indice = list(range(5000))
# 打乱训练顺序
random.shuffle(train_indice)

nn = OCRNeuralNetwork(HIDDEN_NODE_COUNT, data_matrix, data_labels,
                      train_indice)


class JSONHandler(SimpleHTTPRequestHandler):
    """处理接收到的POST请求"""
    def do_POST(self):
        response_code = 200
        response = ""
        var_len = int(self.headers.get('Content-Length'))
        content = self.rfile.read(var_len)
        payload = json.loads(content)

        # 如果是训练请求,训练然后保存训练完的神经网络
        if payload.get('train'):
            nn.train(payload['trainArray'])
            nn.save()
示例#4
0
HOST_NAME = 'localhost'
PORT_NUMBER = 8000
HIDDEN_NODE_COUNT = 15

# Load data samples and labels into matrix
data_matrix = np.loadtxt(open('data.csv', 'rb'), delimiter=',')
data_labels = np.loadtxt(open('dataLabels.csv', 'rb'))

# Convert from numpy ndarrays to python lists
data_matrix = data_matrix.tolist()
data_labels = data_labels.tolist()

# If a neural network file does not exist, train it using all 5000 existing data samples.
# Based on data collected from neural_network_design.py, 15 is the optimal number
# for hidden nodes
nn = OCRNeuralNetwork(HIDDEN_NODE_COUNT, data_matrix, data_labels,
                      list(range(5000)))


class JSONHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_POST(s):
        response_code = 200
        response = ""
        var_len = int(s.headers.get('Content-Length'))
        content = s.rfile.read(var_len)
        payload = json.loads(content)

        if payload.get('train'):
            nn.train(payload['trainArray'])
            nn.save()
        elif payload.get('predict'):
            try:
示例#5
0
            cnt += 1
    return cnt / float(len(test_indices))


# Load data samples and labels into matrix
data_matrix = np.loadtxt(open('mydata.csv', 'rb'), delimiter=',').tolist()
data_labels = np.loadtxt(open('mydataLabels.csv', 'rb')).tolist()

# Create training and testing sets.
train_indices, test_indices = train_test_split(list(range(len(data_matrix))))

print "PERFORMANCE"
print "-----------"

maxi, maxj = 10, 1
maxnn = OCRNeuralNetwork(400, 10, 10, data_matrix, data_labels, train_indices,
                         1)
maxp = test(data_matrix, data_labels, test_indices, maxnn)

for i in xrange(10, 50):
    for j in xrange(1, 10):
        nn = OCRNeuralNetwork(400, i, 10, data_matrix, data_labels,
                              train_indices, j)
        p = test(data_matrix, data_labels, test_indices, nn)
        if p > maxp:
            maxi, maxj, maxp = i, j, p
        performance = str(p)
        print "{i} Hidden Nodes, {j} trainings: {val}".format(i=i,
                                                              j=j,
                                                              val=performance)

print 'max:', maxi, 'Hidden Nodes,', maxj, 'trainings:', str(maxp)
示例#6
0
PORT_NUMBER = 8000

INPUT_NODE_COUNT = 400
HIDDEN_NODE_COUNT = 49
NUM_OF_TRAINGING = 8
OUTPUT_NODE_COUNT = 10

data_matrix = np.loadtxt(open('mydata.csv', 'rb'), delimiter=',')
data_labels = np.loadtxt(open('mydataLabels.csv', 'rb'))

data_matrix = data_matrix.tolist()
data_labels = data_labels.tolist()

print 'begin train'
nn = OCRNeuralNetwork(INPUT_NODE_COUNT, HIDDEN_NODE_COUNT,
                      OUTPUT_NODE_COUNT, data_matrix, data_labels,
                      list(range(len(data_matrix))), NUM_OF_TRAINGING)
print 'end train'
nn.save()


class JSONHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_POST(s):
        response_code = 200
        response = ""
        var_len = int(s.headers.get('Content-Length'))
        content = s.rfile.read(var_len)
        payload = json.loads(content)

        if payload.get('train'):
            TrainData = namedtuple('TrainData', ['y0', 'label'])