/
mlp.py
130 lines (97 loc) · 3.58 KB
/
mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Graph plotting
import matplotlib.pyplot as plt
# Chainer modules
import chainer.functions as F
from chainer import FunctionSet, Variable, optimizers
# NumPy is used by Chainer
import numpy as np
from numpy import genfromtxt
# Helper module to read data from CSV
from dataparser import DataParser
def target_function(x):
"""
The function that we want to approximate using a simple Multi-Layered Perceptron, MLP
y = 2 * x + 8
"""
x_double = np.multiply(2, x)
return np.add(x_double, 8)
# Read the training and test data from CSV files
csv_parser = DataParser()
x_train, y_train = csv_parser.parse("data/linear_training.csv", delimiter=",")
x_test, y_test = csv_parser.parse("data/linear_test.csv", delimiter=",")
# Network parameters
n_units = 10
# Training parameters
n_epochs = 70
# batchsize = np.size(x_train)
batchsize = np.size(x_train)
# The size of the training data
datasize = np.size(x_train)
# Define the linear network model with 1 input unit and 1 output unit
model = FunctionSet(
l1 = F.Linear(1, n_units),
l2 = F.Linear(n_units, 1)
)
def forward(x_data, y_data, train=True):
"""
Define the forward algorithm, a sigmoid activation function and a mean squared error (loss)
"""
# Convert the NumPy data into Chainer Variables
x = Variable(x_data)
t = Variable(y_data)
# Compute the output of the hidden layer with cuDNN (NVIDIA GPU library for DNNs) disabled
h1 = F.sigmoid(model.l1(x), use_cudnn=False)
# Replace the line above with the following line to use dropout
#h1 = F.dropout(F.sigmoid(model.l1(x)), train=True)
# Compute the output of the network
y = model.l2(h1)
# Return the loss so that it can be plotted later on
return F.mean_squared_error(y, t)
# Setup the model with SGD
optimizer = optimizers.SGD()
# Alternative optimizer, Adam
#optimizer = optimizers.Adam()
optimizer.setup(model)
# Data used for later plotting
train_loss = []
test_loss = []
# Finally, start the training
for epoch in range(n_epochs):
print "Epoch ", epoch
sum_loss = 0
# Randomize the batch selection
indices = np.random.permutation(datasize)
for i in range(0, datasize, batchsize):
x_batch = x_train[indices[i : i + batchsize]]
y_batch = y_train[indices[i : i + batchsize]]
# Reset the gradients
optimizer.zero_grads()
# Compute the forward algorithm
loss = forward(x_batch, y_batch, train=True)
# Compute the backward propagation
loss.backward()
# Update the Chainer parameters, using the current gradients
optimizer.update()
# Register the loss so that it can be plotted later on
sum_loss += float(loss.data) * batchsize
# Compute the mean loss for this epoch
epoch_mean_loss = sum_loss / datasize
# Save the epoch mean loss so that it can be plotted later on
train_loss.append(epoch_mean_loss)
# Run the test data so that the delta loss can be plotted
for i in range(0, datasize, batchsize):
x_batch = x_test[indices[i : i + batchsize]]
y_batch = y_test[indices[i : i + batchsize]]
# Compute the forward algorithm
loss = forward(x_batch, y_batch, train=False)
# Register the loss so that it can be plotted later on
sum_loss += float(loss.data) * batchsize
# Compute the mean loss for this epoch
epoch_mean_loss = sum_loss / datasize
# Save the epoch mean loss so that it can be plotted later on
test_loss.append(epoch_mean_loss)
plt.plot(train_loss)
plt.plot(test_loss)
plt.ylabel("Mean squared error")
plt.xlabel("Epochs")
plt.show()