def train(X_train, X_test, y_train, y_test): xp = self.xp model = self._create_model() optimizer = SGD(lr=0.001) optimizer.setup(model) if self.cv: y_test = xp.array(y_test).reshape((-1, 1)) for epoch in range(n_epoch): print('epoch:', epoch) chainer.using_config('train', True) self._train_once(model, optimizer, X_train, y_train) with chainer.no_backprop_mode(): chainer.using_config('train', False) pred = self._predict(model, X_test) if self.cv: loss = F.sigmoid_cross_entropy(pred, y_test) print("test loss:", loss.data) if self.gpu is not None: pred = chainer.cuda.to_cpu(pred) return pred
def setup_optimizer(cfg): if cfg.solver.optimizer == 'SGD': optimizer = SGD(cfg.optimizer.base_lr) elif cfg.solver.optimizer == 'MomentumSGD': optimizer = MomentumSGD(cfg.solver.base_lr, cfg.solver.momentum) else: raise ValueError('Not support `optimizer`: {}.'.format( cfg.solver.optimizer)) return optimizer
def __init__(self, n_features, n_hidden): self.model = chainer.FunctionSet( W1=F.Linear(n_features, n_hidden), W2=F.Linear(n_hidden, 2), activation=F.relu ) for param in self.model.parameters: param[:] = np.random.randn(*param.shape) self.optimizer = SGD() self.optimizer.setup(self.model)
def __init__(self, n_features): # Define what parametrized functions the model consists of. self.model = chainer.FunctionSet( W=F.Linear(n_features, 2) ) # Initialize parameters randomly from gaussian. for param in self.model.parameters: param[:] = np.random.randn(*param.shape) # Define what update rule we will use. SGD is the simplest one, # w' = w + lr * gradient_f(w) self.optimizer = SGD() self.optimizer.setup(self.model)
class NeuralNet(object): def __init__(self, n_features, n_hidden): self.model = chainer.FunctionSet( W1=F.Linear(n_features, n_hidden), W2=F.Linear(n_hidden, 2), activation=F.relu ) for param in self.model.parameters: param[:] = np.random.randn(*param.shape) self.optimizer = SGD() self.optimizer.setup(self.model) def forward_loss(self, x, y, train=True): x = chainer.Variable(x, volatile=not train) y = chainer.Variable(y, volatile=not train) h1 = self.model.activation(self.model.W1(x)) h2 = self.model.W2(h1) loss = F.softmax_cross_entropy(h2, y) return loss, loss.creator.y def learn(self, x, y): self.optimizer.zero_grads() loss, y_hat = self.forward_loss(x, y, train=True) loss.backward() self.optimizer.update() return loss.data def eval(self, mb_x, mb_y): mb_y_hat = self.predict(mb_x) acc = sklearn.metrics.accuracy_score(mb_y, mb_y_hat) prec = sklearn.metrics.precision_score(mb_y, mb_y_hat) recall = sklearn.metrics.recall_score(mb_y, mb_y_hat) return acc, prec, recall def predict(self, x): _, y_hat = self.forward_loss(x, np.zeros((len(x), ), dtype='int32')) return np.argmax(y_hat, axis=1) def plot_eval(self, mb_x, mb_y): pass
classes = np.unique(t_train) # 定義されたクラスラベル num_classes = len(classes) # クラス数 dim_features = x_train.shape[-1] # xの次元 # 超パラメータの定義 learning_rate = 0.5 # learning_rate(学習率)を定義する max_iteration = 100 # 学習させる回数 batch_size = 200 # ミニバッチ1つあたりのサンプル数 dim_hidden = 200 # 隠れ層の次元数を定義する linear_1 = F.Linear(dim_features, dim_hidden) linear_2 = F.Linear(dim_hidden, num_classes) model = FunctionSet(linear_1=linear_1, linear_2=linear_2) optimizer = SGD(learning_rate) optimizer.setup(model) loss_history = [] train_accuracy_history = [] loss_valid_history = [] valid_accuracy_history = [] valid_accuracy_best = 0 valid_loss_best = 10 num_batches = num_train / batch_size # ミニバッチの個数 num_valid_batches = num_valid / batch_size # 学習させるループ for epoch in range(max_iteration): print "epoch:", epoch
y = self.h2y(h) return y max_epoch = 100 hidden_size = 512 #100 bptt_length = 30 batch_size = 100 lr = 0.05 #1e-4 indices, char_to_id, id_to_char = load_shakespear() iterator = RnnIterator(indices, batch_size) vocab_size = len(char_to_id) rnn = SimpleRNN(vocab_size, hidden_size, vocab_size) model = L.Classifier(rnn) optimizer = SGD(lr=lr) optimizer.setup(model) def generate_sample(n=30, init_char=' '): rnn.reset_state() s = '' x = np.array([char_to_id[init_char]]) for i in range(n): y = rnn(x) m = y.data.argmax() c = id_to_char[m] s += c x = np.array([m])
def setUp(self): self.optimizer = SGD(0.1) self.model = LinearModel(self.optimizer)
class LogisticRegression(object): """Logistic regression example in chainer. $$ L(x, y) = -log(softmax(Wx + b)_y) $$ """ def __init__(self, n_features): # Define what parametrized functions the model consists of. self.model = chainer.FunctionSet( W=F.Linear(n_features, 2) ) # Initialize parameters randomly from gaussian. for param in self.model.parameters: param[:] = np.random.randn(*param.shape) # Define what update rule we will use. SGD is the simplest one, # w' = w + lr * gradient_f(w) self.optimizer = SGD() self.optimizer.setup(self.model) def forward_loss(self, x, y, train=True): """Compute the loss function of the model, given the inputs x, and labels y. Args: :arg x Numpy array of dimensionality (batch x input) :arg y Numpy array of dimensionality (batch) """ # Wrap the input variables into a class that takes care of remembering # the call chain. x = chainer.Variable(x, volatile=not train) # volatile=True means the computation graph will not be # built; but for training we need that so we set it to False y = chainer.Variable(y, volatile=not train) # Apply the functions that define the model. wx = self.model.W(x) # Apply f: f(x) = Wx + b loss = F.softmax_cross_entropy(wx, y) # Apply softmax and crossentropy: f(x, y) = -log(e^{x} / sum(e^{x}))_y return loss, loss.creator.y # loss is an instance of chainer.Variable; # loss.creator is the computation node that produced the result; # if you look into the code, it saves softmax outputs as 'y' def learn(self, mb_x, mb_y): """Update parameters given the training data.""" self.optimizer.zero_grads() # Do the forward pass. loss, y_hat = self.forward_loss(mb_x, mb_y, train=True) # Do the backward pass from loss (the Jacobian computation). loss.backward() # Update the parameters W' = W + lr * J^{W}_{loss}(W), b' = b + ... self.optimizer.update() # Return the "raw" loss (i.e. not chainer.Variable). return loss.data def eval(self, mb_x, mb_y): """Compute some metrics on the given minibatch. :param mb_x: Numpy array of float32 of dimensionality (batch x input) :param mb_y: Numpy array of int32 of dimensionality (batch) with the labels for each input in mb_x :return: Accuracy, Precision, Recall metrics """ mb_y_hat = self.predict(mb_x) # Get model's predictions about the input data. # Compare predictions to the true labels and compute accuracy, precision and recall. acc = sklearn.metrics.accuracy_score(mb_y, mb_y_hat) prec = sklearn.metrics.precision_score(mb_y, mb_y_hat) recall = sklearn.metrics.recall_score(mb_y, mb_y_hat) return acc, prec, recall def predict(self, mb_x): """Predict labels for the given input minibatch. :param mb_x: Numpy array of float32 of dimensionality (batch x input) :return: Numpy array of int32 of dimensionality (batch) """ _, y_hat = self.forward_loss(mb_x, np.zeros((len(mb_x), ), dtype='int32')) return np.argmax(y_hat, axis=1) def plot_eval(self, mb_x, mb_y): """Plot the minibatches in 2D and also the separating hyperplane.""" import matplotlib.pyplot as plt import seaborn seaborn.set() x1 = mb_x[:, 0] x2 = mb_x[:, 1] y = mb_y dec_x1 = np.linspace(-1, 1) w1_m_w2 = self.model.W.W[0] - self.model.W.W[1] b1_m_b2 = self.model.W.b[0] - self.model.W.b[1] dec_x2 = - (w1_m_w2[0] / w1_m_w2[1] * dec_x1) - b1_m_b2 / w1_m_w2[1] plt.plot(x1[y == 0], x2[y == 0], 'o', label='Class 0', markersize=3, color='red') plt.plot(x1[y == 1], x2[y == 1], 'o', label='Class 1', markersize=3, color='green') plt.plot(dec_x1, dec_x2, '-', label='Classifier', color='blue') plt.legend() plt.show() def train(self, n_epochs=10, data='lin'): """Train the given model on the given dataset.""" data_train, x_test, x_valid, x_train, y_test, y_valid, y_train = train._prepare_data(data) n_data = len(data_train) # Set the learning rate. self.optimizer.lr = 0.001 # Good learning rate is around 0.1. We use this one to show the model gradually improves with more iterations. n_instances = 0 begin_t = last_print_t = time.time() # Run for the given number of epochs. for epoch in range(n_epochs): # For SGD it's important to randomize order in which we look at the data points. # So for each epoch we randomly choose the order in which we see them. order = range(n_data) np.random.shuffle(order) loss = 0.0 for i in order: x = x_train[i:i + 1] # We do it this way (instead of x_train[i]) so that the result is of (1 x input) dimensionalit that model.learn expects, instead of just (input). y = y_train[i:i + 1] # Ask the model to update its parameters given the current example (it uses the model.optimizer rule to update the parameters). curr_loss = self.learn(x, y) loss += 1.0 / n_data * curr_loss n_instances += 1 # Print something every second so that we keep the frustration low ;) if time.time() - last_print_t > 1.0: last_print_t = time.time() a, p, r = self.eval(x_valid, y_valid) #import ipdb; ipdb.set_trace() print '> t(%.1f) train_loss(%.3f) examples(%d) valid{acc(%.3f) prec(%.3f) recall(%.3f)}' % (last_print_t - begin_t, loss, n_instances, a, p, r ) # Compute the metrics and show evaluation on the test set. a, p, r = self.eval(x_test, y_test) print '# acc(%.3f) prec(%.3f) recall(%.3f)' % (a, p, r,) self.plot_eval(x_test, y_test)