def test_mnist(): (train_x, train_y), (test_x, test_y) = mnist.load_data() val_x = train_x[50000:] val_y = train_y[50000:] train_x = train_x[:50000] train_y = train_y[:50000] batch_size = 200 modle = models.Sequential() modle.add(layers.Linear(28, input_shape=(None, train_x.shape[1]))) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.Softmax()) acc = losses.categorical_accuracy.__name__ modle.compile(losses.CrossEntropy(), optimizers.SGD(lr=0.001), metrics=[losses.categorical_accuracy]) modle.summary() history = modle.train(train_x, train_y, batch_size, epochs=32, validation_data=(val_x, val_y)) epochs = range(1, len(history["loss"]) + 1) plt.plot(epochs, history["loss"], 'ro', label="Traning loss") plt.plot(epochs, history["val_loss"], 'go', label="Validating loss") plt.plot(epochs, history[acc], 'r', label="Traning accuracy") plt.plot(epochs, history["val_" + acc], 'g', label="Validating accuracy") plt.title('Training/Validating loss/accuracy') plt.xlabel('Epochs') plt.ylabel('Loss/Accuracy') plt.legend() plt.show(block=True)
def __init__(self, normalize, stochastic, device): super(ComplexConv, self).__init__() self.device = device self.stochastic = stochastic if self.stochastic: args = [device, normalize] self.conv1 = layers.Conv2d(3, 64, 3, *args) self.conv2 = layers.Conv2d(64, 128, 3, *args) self.conv3 = layers.Conv2d(128, 256, 3, *args) self.pool = nn.AvgPool2d(2, 2) self.fc1 = layers.Linear(64 * 4 * 4, 128, *args) self.fc2 = layers.Linear(128, 256, *args) else: self.conv1 = nn.Conv2d(3, 64, 3) self.conv2 = nn.Conv2d(64, 128, 3) self.conv3 = nn.Conv2d(128, 256, 3) self.pool = nn.MaxPool2d(2, 2) self.fc1 = nn.Linear(64 * 4 * 4, 128) self.fc2 = nn.Linear(128, 256) self.classifier = nn.Linear(256, 10, bias=False) self.classifier.weight.requires_grad = False torch.nn.init.orthogonal_(self.classifier.weight)
def run_test_model(): basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) A1 = activations.Relu(Z1, name='relu') Z2 = layers.Linear(output_dims=(5, None), initialiser="glorot", name="Henry")(A1) A2 = activations.Relu(Z2, name='relu') Z3 = layers.Linear(output_dims=(1, None), initialiser="glorot")( A2) #name shoud be automatically set to "Henry2" output = activations.Sigmoid(Z3, name='sigmoid') optimiser = optimisers.GradientDesc(learning_rate=0.001) basic_NN.compile( input=input, output=output, lambd=0.01, loss="BinaryCrossEntropy", optimiser=optimiser) # BGD stands for Batch Gradient Descent #Basic_NN.fit(X, Y, num_iterations = 10000, verbose = 1) num_iterations = 10000 for _ in range(num_iterations): basic_NN.forward_prop(X) basic_NN.compute_cost(Y) basic_NN.back_prop() basic_NN.update_weights()
def test_layers_addition(self): v = core.FeedForward() v += layers.Linear(2, 3) v += layers.Tanh(3, 2) v += layers.Linear(2, 1) self.assertEqual(len(v.layers), 3) self.assertEqual(len(v.layers[1].v), 2)
def test_dropout_after_training(self): n = core.FeedForward(momentum=0.1, learn_rate=0.1) drop = layers.Dropout(layers.Tanh(2, 2), percentage=0.5) n += layers.Linear(2, 2) n += drop n += layers.Linear(2, 1) s = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] n.fit(*s[1]) n.fit(*s[0]) n.fit(*s[2]) n.fit(*s[0]) n.fit(*s[1]) zeros = 0 for row in drop.y: if row[0] == 0: zeros += 1 self.assertEqual(zeros, len(drop.w) // 2)
def __init__(self, n_in, nh, n_out): super().__init__() self.layers = nn.Sequential( layers.Linear(n_in, nh), layers.ReLU(), layers.Linear(nh, n_out)) self.loss = functions.MSE
def __init__(self, in_size, n_classes): self.h1 = layers.Linear(in_size, 128) self.h2 = layers.Linear(128, n_classes) self.relu = ReLU() self.softmax = SoftMax() self.sigmoid = Sigmoid()
def test_2layer_net(): params = init_toy_model() X, y = init_toy_data() Y_enc = ut.encode_labels(y) # Make the net layer_1 = layers.Linear(*params['W1'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W1'].T, params['b1'].ravel())) act_1 = layers.Relu() layer_2 = layers.Linear(*params['W2'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W2'].T, params['b2'].ravel())) net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(), optim.SGD(lr=1e-5)) scores = net_2.forward(X) correct_scores = np.asarray([[-1.07260209, 0.05083871, -0.87253915], [-2.02778743, -0.10832494, -1.52641362], [-0.74225908, 0.15259725, -0.39578548], [-0.38172726, 0.10835902, -0.17328274], [-0.64417314, -0.18886813, -0.41106892]]) diff = np.sum(np.abs(scores - correct_scores)) assert (np.isclose(diff, 0.0, atol=1e-6)) loss = net_2.loss(X, Y_enc) correct_loss = 1.071696123862817 assert (np.isclose(loss, correct_loss, atol=1e-8))
def __init__(self, num_inputs, action_space, model_train=True): self.model_train = model_train self.conv1 = layers.Conv2d(num_inputs, 32, 3, stride=2, padding=1, train=model_train) self.conv2 = layers.Conv2d(32, 32, 3, stride=2, padding=1, train=model_train) self.conv3 = layers.Conv2d(32, 32, 3, stride=2, padding=1, train=model_train) self.conv4 = layers.Conv2d(32, 32, 3, stride=2, padding=1, train=model_train) self.lstm = layers.LSTMCell(32 * 3 * 3, 256, train=model_train) num_outputs = action_space.n self.critic_linear = layers.Linear(256, 1, train=model_train) self.actor_linear = layers.Linear(256, num_outputs, train=model_train) # initial paramater self.conv1.init_weight(random=True) self.conv1.init_bias(random=False) self.conv2.init_weight(random=True) self.conv2.init_bias(random=False) self.conv3.init_weight(random=True) self.conv3.init_bias(random=False) self.conv4.init_weight(random=True) self.conv4.init_bias(random=False) self.critic_linear.init_weight(random=True) self.critic_linear.init_bias(random=False) self.actor_linear.init_weight(random=True) self.actor_linear.init_bias(random=False) self.lstm.init_weight(random=True) self.lstm.init_bias(random=False) # grad self.y1 = [] self.y2 = [] self.y3 = [] self.y4 = []
def test_cv(self): import core.estimators n = core.FeedForward(momentum=0.1, learn_rate=0.1) n += layers.Linear(2, 2) n += layers.Tanh(2, 1) n += layers.Linear(1, 0) s = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] error = core.estimators.cv(n, s) self.assertTrue(type(error) is float)
def __init__(self, in_channels, n_classes): self.in_channels = in_channels self.n_classes = n_classes self.conv1 = layers.Conv2D(in_channels, 3, 3) self.relu1 = ReLU() # self.sig1 = Sigmoid() self.pool1 = MaxPooling(kernel_size=2) self.fc1 = layers.Linear(3 * 13 * 13, 256) self.relu2 = ReLU() # self.sig2 = Sigmoid() self.fc2 = layers.Linear(256, n_classes) self.softmax = SoftMax()
def test_get(self): nn1 = core.FeedForward(momentum=0.1, learn_rate=0.1) nn1 += layers.Tanh(2, 2) nn1 += layers.Linear(2, 2) nn2 = core.FeedForward(momentum=0.1, learn_rate=0.1) nn2 += layers.Tanh(2, 2) nn2 += layers.Linear(2, 2) ensemble = core.Ensemble(nn1, nn2) ensemble.fit([0, 1], [2, 1]) stack = numpy.vstack((nn1.get([0, 0]), nn2.get([0, 0]))) self.assertEqual(round(ensemble.get([0, 0])[0] * 1000), round((stack.sum(axis=0) / len(stack))[0] * 1000))
def __init__(self, args, device='cuda'): super().__init__() self.args = args self.device = device self.img_channels = 3 self.depths = [args.zdim, 256, 256, 256, 128, 128] self.didx = 0 self.alpha = 1. # init G self.G = nn.ModuleList() blk = nn.ModuleList() blk.append(ll.Conv2d(self.depths[0], self.depths[0], 4, padding=3)) # to 4x4 blk.append(ll.Conv2d(self.depths[0], self.depths[0], 3, padding=1)) self.G.append(blk) self.toRGB = nn.ModuleList() self.toRGB.append(ll.Conv2d(self.depths[0], self.img_channels, 1, lrelu=False, pnorm=False)) # toRGB # init D self.fromRGB = nn.ModuleList() self.fromRGB.append(ll.Conv2d(self.img_channels, self.depths[0], 1)) # fromRGB self.D = nn.ModuleList() blk = nn.ModuleList() blk.append(ll.MinibatchStddev()) blk.append(ll.Conv2d(self.depths[0]+1, self.depths[0], 3, padding=1)) blk.append(ll.Conv2d(self.depths[0], self.depths[0], 4, stride=4)) # to 1x1 blk.append(ll.Flatten()) blk.append(ll.Linear(self.depths[0], 1)) self.D.append(blk) self.doubling = nn.Upsample(scale_factor=2) self.halving = nn.AvgPool2d(2, 2) self.set_optimizer() # self.criterion = losses.GANLoss(loss_type=args.loss_type, device=device) self.loss_type = args.loss_type
def test_compare_linear_syntax_and_linear_layer(self): x = np.random.rand(3) syntax_model = syntax.WxBiasLinear(3, 4, initialize_W='ones', initialize_b='ones', input=Var('x')) layer_model = layers.Linear(3, 4, initialize='ones') optimizer = SGD(0.1) # W = np.ones((4, 3)) # b = np.ones(4) for i in range(5): syntax_y = syntax_model.forward_variables({'x': x}) layer_y = layer_model.forward(x) assert_array_almost_equal(syntax_y, layer_y, decimal=12) dJdy = np.random.rand(4) syntax_grad = syntax_model.backward_variables(dJdy) layer_grad = layer_model.backward(dJdy) self.assertEqual(syntax_grad['x'].shape, layer_grad.shape, 'gradients should have the same vector shape') assert_array_almost_equal(syntax_grad['x'], layer_grad) # real_y = W.dot(x) + b # real_grad = W.T.dot(dJdy) # assert_array_equal(real_y, syntax_y) # assert_array_equal(syntax_grad['x'], real_grad) syntax_model.update_weights(optimizer) layer_model.update_weights(optimizer)
def test_update_weights_layer_vs_syntax(self): x = np.array([1., 2., 3.]) optimizer = SGD(0.1) W = np.random.rand(3, 3 + 1) linear_layer = layers.Linear(3, 3, initialize=W.copy()) linear_layer_model = Seq(linear_layer, layers.Tanh) y = linear_layer_model.forward(x) back = linear_layer_model.backward(np.ones(3)) var_x = Var('x') syntax_linear = Linear(3, 3, initialize=W.copy(), input=var_x) syntax_model = Tanh(syntax_linear) syntax_y = syntax_model.forward_variables({'x': x}) syntax_back = syntax_model.backward_variables(np.ones(3)) assert_array_equal(linear_layer.delta_W, syntax_linear.layer.delta_W) # update weights in both models linear_layer_model.update_weights(optimizer) syntax_model.update_weights(optimizer) assert_array_equal(y, syntax_y) assert_array_equal(back, syntax_back['x']) assert_array_equal(linear_layer.W, syntax_linear.layer.W)
def __init__(self, in_size, out_size, initialize='random', dtype=None, input=None): SyntaxOp.__init__(self, input) self.layer = layers.Linear(in_size, out_size, initialize, dtype)
def test_dropout_drop(self): l = layers.Dropout(layers.Linear(10, 6), percentage=0.5) zeros = 0 for row in l.D: if row[0] == 0: zeros += 1 self.assertEqual(zeros, len(l.D) // 2)
def test_2layer_grad(): params = init_toy_model() X, y = init_toy_data() Y_enc = ut.encode_labels(y) # Make the net layer_1 = layers.Linear(*params['W1'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W1'].T, params['b1'].ravel())) act_1 = layers.Relu() layer_2 = layers.Linear(*params['W2'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W2'].T, params['b2'].ravel())) net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(), optim.SGD(lr=1e-5)) loss = net_2.loss(X, Y_enc) net_2.backward() def f_change_param(param_name, U): if param_name == 3: net_2.layers[0].params['b'] = U if param_name == 2: net_2.layers[0].params['W'] = U if param_name == 1: net_2.layers[2].params['b'] = U if param_name == 0: net_2.layers[2].params['W'] = U return net_2.loss(X, Y_enc) rel_errs = np.empty(4) for param_name in range(4): f = lambda U: f_change_param(param_name, U) if param_name == 3: pass_pars = net_2.layers[0].params['b'] if param_name == 2: pass_pars = net_2.layers[0].params['W'] if param_name == 1: pass_pars = net_2.layers[2].params['b'] if param_name == 0: pass_pars = net_2.layers[2].params['W'] param_grad_num = dutil.grad_check(f, pass_pars, epsilon=1e-5) rel_errs[param_name] = ut.rel_error(param_grad_num, net_2.grads[param_name]) assert (np.allclose(rel_errs, np.zeros(4), atol=1e-7))
def test_CrossEntropyLoss(): np.random.seed(1) W = np.random.randn(c, n) * 0.0001 b = np.random.randn(c, 1) * 0.0001 layer_lin = layers.Linear(n, c, init_vals=(W.T, b.ravel())) loss_func = ls.CrossEntropy() net = nn.Network([layer_lin], loss_func, optimizer=None) my_loss = net.loss(X_dev, Y_dev_enc) assert (np.isclose(my_loss, -np.log(.1), atol=1e-2))
def loss_func_b(bb): layer_lin = layers.Linear(n, c, reg='l2', reg_param=0.05, init_vals=(W.T, bb.ravel())) loss_func = ls.CrossEntropy() net = nn.Network([layer_lin], loss_func, optimizer=None) return net.loss(X_dev, Y_dev_enc)
def test_overfitting(cifar, momentum): training = cifar.get_named_batches('data_batch_1').subset(100) net = Network() net.add_layer( layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = MomentumSGD(net, initial_learning_rate=0.005, momentum=momentum) opt.train(training, training, 400) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/overfit_mom{}.png'.format(momentum)) show_plot('images/overfit_mom{}.png'.format(momentum))
def test_fully_connected_NN(): basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) A1 = activations.Sigmoid(Z1, name='sigmoid') Z2 = layers.Linear(output_dims=(5, None), initialiser="glorot", name="linear")(A1) A2 = activations.Sigmoid(Z2, name='sigmoid') Z3 = layers.Linear(output_dims=(1, None), initialiser="glorot", name="linear")(A2) output = activations.Sigmoid(Z3, name='sigmoid') print(f'basic_NN.graph_lis {basic_NN.graph_lis}') #['linear1'] print(f'basic_NN.graph_dict {basic_NN.graph_dict}' ) # "linear1 <layers.linear object> print(f'output.jtdnn_obj {output.jtdnn_obj}') print(f'output.output_size {output.output_size}')
def test_CrossEntropy_Linear_Grad(): np.random.seed(1) W = np.random.randn(c, n) * 0.0001 b = np.random.randn(c, 1) * 0.0001 layer_lin = layers.Linear(n, c, reg='l2', reg_param=0.05, init_vals=(W.T, b.ravel())) loss_func = ls.CrossEntropy() net = nn.Network([layer_lin], loss_func, optimizer=None) net_loss = net.loss(X_dev, Y_dev_enc) ngrad = net.backward() # Define functions to pass to helper def loss_func_W(ww): layer_lin = layers.Linear(n, c, reg='l2', reg_param=0.05, init_vals=(ww.T, b.ravel())) loss_func = ls.CrossEntropy() net = nn.Network([layer_lin], loss_func, optimizer=None) return net.loss(X_dev, Y_dev_enc) def loss_func_b(bb): layer_lin = layers.Linear(n, c, reg='l2', reg_param=0.05, init_vals=(W.T, bb.ravel())) loss_func = ls.CrossEntropy() net = nn.Network([layer_lin], loss_func, optimizer=None) return net.loss(X_dev, Y_dev_enc) # Actually run the test rel_err_weight = dutil.grad_check_sparse(loss_func_W, W, net.grads[0].T, 10, seed=42) rel_err_bias = dutil.grad_check_sparse(loss_func_b, b.ravel(), net.grads[1], 10, seed=42) assert (np.allclose(rel_err_weight, np.zeros(rel_err_weight.shape), atol=1e-4)) assert (np.allclose(rel_err_bias, np.zeros(rel_err_bias.shape), atol=1e-4))
def __init__(self, normalize, stochastic, device): super(SimpleConv, self).__init__() self.device = device self.stochastic = stochastic if self.stochastic: args = [device, normalize] self.conv1 = layers.Conv2d(3, 6, 5, *args) self.conv2 = layers.Conv2d(6, 16, 5, *args) self.fc1 = layers.Linear(16 * 5 * 5, 120, *args) self.fc2 = layers.Linear(120, 84, *args) else: self.conv1 = nn.Conv2d(3, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.classifier = nn.Linear(84, 10, bias=False) if self.stochastic: self.classifier.weight.requires_grad = False torch.nn.init.orthogonal_(self.classifier.weight)
def test_linear_class(): basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) print(f'basic_NN.graph_lis {basic_NN.graph_lis}') #['linear1'] print(f'basic_NN.graph_dict {basic_NN.graph_dict}' ) # "linear1 <layers.linear object> print(f'input.jtdnn_obj {input.jtdnn_obj}') # JTDNN object print(f'input.output_dims {input.output_dims}') # (2, None) print(f'Z1 {Z1}') print(f'Z1.output_dims {Z1.output_dims}') print(f'Z1.output_size {Z1.output_size}') print(f'Z1.W {Z1.W}') print(f'Z1.b {Z1.b}') print(f'Z1.W.shape {Z1.W.shape}') print(f'Z1.b.shape {Z1.b.shape}')
def test_by_xor(self): error = 0.1 n = core.FeedForward( momentum=0.1, learn_rate=0.1) # .create([2, 2, 1], default=layers.Tanh) n += layers.Tanh(2, 2) n += layers.Linear(2, 1) s = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] for i in range(10_000): r = random.randint(0, len(s) - 1) n.fit(*s[r])
def test_vanilla(cifar): training = cifar.get_named_batches('data_batch_1') validation = cifar.get_named_batches('data_batch_2') net = Network() net.add_layer( layers.Linear(cifar.input_size, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = VanillaSGD(net, initial_learning_rate=0.01, decay_factor=0.99, shuffle=True) opt.train(training, validation, 100, 500) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/vanilla.png') show_plot('images/vanilla.png')
def test_xor_by_train(self): error = 0.1 n = core.FeedForward(momentum=0.1, learn_rate=0.1) n += layers.Tanh(2, 2) n += layers.Linear(2, 1) s = [ ([0, 0], [0]), ([0, 1], [1]), ([1, 0], [1]), ([1, 1], [0]), ] n.train(s, 10_000) for v in s: res = n.get(v[0]) self.assertTrue(abs(v[1][0] - res[0]) < error) for v in s: print(n.get(v[0]), end='\n\n')
def __init__(self, normalize, stochastic, device): super(LeNet5, self).__init__() self.stochastic = stochastic if stochastic: args = [device, normalize] # from linked paper top of page 4 and section 2.2 module_list = [ layers.Conv2d(1, 6, 5, *args), nn.AvgPool2d(2), layers.Conv2d(6, 16, 5, *args), nn.AvgPool2d(2), layers.Conv2d(16, 120, 5, *args), layers.Linear(120, 84, *args) ] self.linear_layer = nn.Linear(84, 10, bias=False) torch.nn.init.orthogonal_(self.linear_layer.weight) if stochastic: self.linear_layer.weight.requires_grad = False else: module_list = [ nn.Conv2d(1, 6, 5), nn.Tanh(), nn.AvgPool2d(2), nn.Tanh(), nn.Conv2d(6, 16, 5), nn.Tanh(), nn.AvgPool2d(2), nn.Tanh(), nn.Conv2d(16, 120, 5), nn.Tanh(), nn.Linear(120, 84), ] self.linear_layer = nn.Linear(84, 10, bias=False) self.layers = nn.ModuleList(module_list)
def convert_to_one_hot_labels(input, target, zero_value=0): '''Convert output to one-hot labeled tensor. Value at label position will be 1 and zero_value everywhere else.''' tmp = input.new(target.size(0), target.max() + 1).fill_(zero_value) tmp.scatter_(1, target.view(-1, 1), 1.0) return tmp y_train = convert_to_one_hot_labels(x_train, y_train, -1) y_test = convert_to_one_hot_labels(x_train, y_test, -1) ### Testing the speed of our own framework ### # Defining the model architecture model = containers.Sequential(layers.Linear(2, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 2, with_bias=True), activations.Tanh()) criterion = losses.LossMSE() optimizer = optimizers.SGD(model.param(), learning_rate=0.001) def compute_nb_errors(model, data_input, data_target): mini_batch_size = 100 n_misclassified = 0