class NeuralNet(object): def __init__(self, n_features, n_hidden): self.model = chainer.FunctionSet( W1=F.Linear(n_features, n_hidden), W2=F.Linear(n_hidden, 2), activation=F.relu ) for param in self.model.parameters: param[:] = np.random.randn(*param.shape) self.optimizer = SGD() self.optimizer.setup(self.model) def forward_loss(self, x, y, train=True): x = chainer.Variable(x, volatile=not train) y = chainer.Variable(y, volatile=not train) h1 = self.model.activation(self.model.W1(x)) h2 = self.model.W2(h1) loss = F.softmax_cross_entropy(h2, y) return loss, loss.creator.y def learn(self, x, y): self.optimizer.zero_grads() loss, y_hat = self.forward_loss(x, y, train=True) loss.backward() self.optimizer.update() return loss.data def eval(self, mb_x, mb_y): mb_y_hat = self.predict(mb_x) acc = sklearn.metrics.accuracy_score(mb_y, mb_y_hat) prec = sklearn.metrics.precision_score(mb_y, mb_y_hat) recall = sklearn.metrics.recall_score(mb_y, mb_y_hat) return acc, prec, recall def predict(self, x): _, y_hat = self.forward_loss(x, np.zeros((len(x), ), dtype='int32')) return np.argmax(y_hat, axis=1) def plot_eval(self, mb_x, mb_y): pass
b_1_grad_norms = [] b_2_grad_norms = [] # mini batchi SGDで重みを更新させるループ time_start = time.time() perm = np.random.permutation(num_train) for batch_indexes in np.array_split(perm, num_batches): x_batch = x_train[batch_indexes] t_batch = t_train[batch_indexes] batch_loss, batch_accuracy = loss_and_accuracy(model, x_batch, t_batch) # 逆伝播 optimizer.zero_grads() batch_loss.backward() optimizer.update() w_1_grad_norm = np.linalg.norm(model.linear_1.W.grad) w_1_grad_norms.append(w_1_grad_norm) w_2_grad_norm = np.linalg.norm(model.linear_2.W.grad) w_2_grad_norms.append(w_2_grad_norm) b_1_grad_norm = np.linalg.norm(model.linear_1.b.grad) b_1_grad_norms.append(b_1_grad_norm) b_2_grad_norm = np.linalg.norm(model.linear_2.b.grad) b_2_grad_norms.append(b_2_grad_norm) time_finish = time.time() time_elapsed = time_finish - time_start
class LogisticRegression(object): """Logistic regression example in chainer. $$ L(x, y) = -log(softmax(Wx + b)_y) $$ """ def __init__(self, n_features): # Define what parametrized functions the model consists of. self.model = chainer.FunctionSet( W=F.Linear(n_features, 2) ) # Initialize parameters randomly from gaussian. for param in self.model.parameters: param[:] = np.random.randn(*param.shape) # Define what update rule we will use. SGD is the simplest one, # w' = w + lr * gradient_f(w) self.optimizer = SGD() self.optimizer.setup(self.model) def forward_loss(self, x, y, train=True): """Compute the loss function of the model, given the inputs x, and labels y. Args: :arg x Numpy array of dimensionality (batch x input) :arg y Numpy array of dimensionality (batch) """ # Wrap the input variables into a class that takes care of remembering # the call chain. x = chainer.Variable(x, volatile=not train) # volatile=True means the computation graph will not be # built; but for training we need that so we set it to False y = chainer.Variable(y, volatile=not train) # Apply the functions that define the model. wx = self.model.W(x) # Apply f: f(x) = Wx + b loss = F.softmax_cross_entropy(wx, y) # Apply softmax and crossentropy: f(x, y) = -log(e^{x} / sum(e^{x}))_y return loss, loss.creator.y # loss is an instance of chainer.Variable; # loss.creator is the computation node that produced the result; # if you look into the code, it saves softmax outputs as 'y' def learn(self, mb_x, mb_y): """Update parameters given the training data.""" self.optimizer.zero_grads() # Do the forward pass. loss, y_hat = self.forward_loss(mb_x, mb_y, train=True) # Do the backward pass from loss (the Jacobian computation). loss.backward() # Update the parameters W' = W + lr * J^{W}_{loss}(W), b' = b + ... self.optimizer.update() # Return the "raw" loss (i.e. not chainer.Variable). return loss.data def eval(self, mb_x, mb_y): """Compute some metrics on the given minibatch. :param mb_x: Numpy array of float32 of dimensionality (batch x input) :param mb_y: Numpy array of int32 of dimensionality (batch) with the labels for each input in mb_x :return: Accuracy, Precision, Recall metrics """ mb_y_hat = self.predict(mb_x) # Get model's predictions about the input data. # Compare predictions to the true labels and compute accuracy, precision and recall. acc = sklearn.metrics.accuracy_score(mb_y, mb_y_hat) prec = sklearn.metrics.precision_score(mb_y, mb_y_hat) recall = sklearn.metrics.recall_score(mb_y, mb_y_hat) return acc, prec, recall def predict(self, mb_x): """Predict labels for the given input minibatch. :param mb_x: Numpy array of float32 of dimensionality (batch x input) :return: Numpy array of int32 of dimensionality (batch) """ _, y_hat = self.forward_loss(mb_x, np.zeros((len(mb_x), ), dtype='int32')) return np.argmax(y_hat, axis=1) def plot_eval(self, mb_x, mb_y): """Plot the minibatches in 2D and also the separating hyperplane.""" import matplotlib.pyplot as plt import seaborn seaborn.set() x1 = mb_x[:, 0] x2 = mb_x[:, 1] y = mb_y dec_x1 = np.linspace(-1, 1) w1_m_w2 = self.model.W.W[0] - self.model.W.W[1] b1_m_b2 = self.model.W.b[0] - self.model.W.b[1] dec_x2 = - (w1_m_w2[0] / w1_m_w2[1] * dec_x1) - b1_m_b2 / w1_m_w2[1] plt.plot(x1[y == 0], x2[y == 0], 'o', label='Class 0', markersize=3, color='red') plt.plot(x1[y == 1], x2[y == 1], 'o', label='Class 1', markersize=3, color='green') plt.plot(dec_x1, dec_x2, '-', label='Classifier', color='blue') plt.legend() plt.show() def train(self, n_epochs=10, data='lin'): """Train the given model on the given dataset.""" data_train, x_test, x_valid, x_train, y_test, y_valid, y_train = train._prepare_data(data) n_data = len(data_train) # Set the learning rate. self.optimizer.lr = 0.001 # Good learning rate is around 0.1. We use this one to show the model gradually improves with more iterations. n_instances = 0 begin_t = last_print_t = time.time() # Run for the given number of epochs. for epoch in range(n_epochs): # For SGD it's important to randomize order in which we look at the data points. # So for each epoch we randomly choose the order in which we see them. order = range(n_data) np.random.shuffle(order) loss = 0.0 for i in order: x = x_train[i:i + 1] # We do it this way (instead of x_train[i]) so that the result is of (1 x input) dimensionalit that model.learn expects, instead of just (input). y = y_train[i:i + 1] # Ask the model to update its parameters given the current example (it uses the model.optimizer rule to update the parameters). curr_loss = self.learn(x, y) loss += 1.0 / n_data * curr_loss n_instances += 1 # Print something every second so that we keep the frustration low ;) if time.time() - last_print_t > 1.0: last_print_t = time.time() a, p, r = self.eval(x_valid, y_valid) #import ipdb; ipdb.set_trace() print '> t(%.1f) train_loss(%.3f) examples(%d) valid{acc(%.3f) prec(%.3f) recall(%.3f)}' % (last_print_t - begin_t, loss, n_instances, a, p, r ) # Compute the metrics and show evaluation on the test set. a, p, r = self.eval(x_test, y_test) print '# acc(%.3f) prec(%.3f) recall(%.3f)' % (a, p, r,) self.plot_eval(x_test, y_test)