def sample_2(): # create random input and output data x = Variable(X.copy()) y = Variable(Y.copy()) # randomly initialize weights w1 = Variable(W1.copy()) w2 = Variable(W2.copy()) for t in range(EPOCHS): # forward pass: compute predicted y h = F.matmul(x, w1) h_relu = F.relu(h) y_pred = F.matmul(h_relu, w2) # compute and print loss loss = F.mean_squared_error(y_pred, y) print(loss.data) # manually zero the gradients w1.zerograd() w2.zerograd() # backward pass # loss.grad = np.ones(loss.shape, dtype=np.float32) loss.backward() # update weights w1.data -= LEARNING_RATE * w1.grad w2.data -= LEARNING_RATE * w2.grad
def sample_2(): # create random input and output data x = Variable(np.random.randn(DATA_SIZE, N_I).astype(np.float32)) y = Variable(np.random.randn(DATA_SIZE, N_O).astype(np.float32)) # randomly initialize weights w1 = Variable(np.random.randn(N_I, HIDDEN_SIZE).astype(np.float32)) w2 = Variable(np.random.randn(HIDDEN_SIZE, N_O).astype(np.float32)) for t in range(EPOCHS): # forward pass: compute predicted y h = F.matmul(x, w1) h_r = F.relu(h) y_p = F.matmul(h_r, w2) # compute and print loss loss = F.mean_squared_error(y_p, y) print(loss.data) # manually zero the gradients w1.zerograd() w2.zerograd() # backward pass # loss.grad = np.ones(loss.shape, dtype=np.float32) loss.backward() # update weights w1.data -= LEARNING_RATE * w1.grad w2.data -= LEARNING_RATE * w2.grad
def _apply_backward(self, x, grid, grads, use_cudnn): x = Variable(x) grid = Variable(grid) y = functions.spatial_transformer_sampler(x, grid, use_cudnn=use_cudnn) x.zerograd() grid.zerograd() y.grad = grads y.backward() return x, grid, y
def _apply_backward(self, x, grid, grads, use_cudnn): x = Variable(x) grid = Variable(grid) y = functions.spatial_transformer_sampler( x, grid, use_cudnn=use_cudnn) x.zerograd() grid.zerograd() y.grad = grads y.backward() return x, grid, y
def update_step(net, images, step_size=1.5, end='inception_4c/output', jitter=32, clip=True): offset_x, offset_y = np.random.randint(-jitter, jitter + 1, 2) data = np.roll(np.roll(images, offset_x, -1), offset_y, -2) x = Variable(xp.asarray(data)) x.zerograd() dest, = net(x, outputs=[end]) objective(dest).backward() g = cuda.to_cpu(x.grad) data[:] += step_size / np.abs(g).mean() * g data = np.roll(np.roll(data, -offset_x, -1), -offset_y, -2) if clip: bias = net.mean.reshape((1, 3, 1, 1)) data[:] = np.clip(data, -bias, 255 - bias) return data
# create random input and output data x = Variable(X) y = Variable(Y) # randomly initialize weights w1 = Variable(W1) w2 = Variable(W2) for t in range(EPOCHS): # forward pass: compute predicted y h = F.matmul(x, w1) h_r = F.relu(h) y_p = F.matmul(h_r, w2) # compute and print loss loss = F.mean_squared_error(y_p, y) print(loss.data) # manually zero the gradients w1.zerograd() w2.zerograd() # backward pass # loss.grad = np.ones(loss.shape, dtype=np.float32) loss.backward() # update weights w1.data -= LEARNING_RATE * w1.grad w2.data -= LEARNING_RATE * w2.grad
for batch_indexes in np.array_split(perm, num_batches): x_batch = x_train[batch_indexes] t_batch = t_train[batch_indexes] x = Variable(x_batch) t = Variable(t_batch) # 順伝播 a_z = F.linear(x, w_1, b_1) z = F.tanh(a_z) a_y = F.linear(z, w_2, b_2) loss = F.softmax_cross_entropy(a_y, t) # 逆伝播 w_1.zerograd() w_2.zerograd() b_1.zerograd() b_2.zerograd() loss.backward(retain_grad=True) grad_w_1 = w_1.grad grad_w_2 = w_2.grad grad_b_1 = b_1.grad grad_b_2 = b_2.grad w_1.data = w_1.data - learning_rate * grad_w_1 w_2.data = w_2.data - learning_rate * grad_w_2 b_1.data = b_1.data - learning_rate * grad_b_1 b_2.data = b_2.data - learning_rate * grad_b_2