def conv_bn_conv_bn_pool2x2(inp_layer, conv_filters, conv_shapes, res_shape, training_name): assert conv_shapes[0][1] == conv_shapes[0][2] pad1 = conv_shapes[0][1] // 2 conv1 = layers.Conv((conv_filters[0], ) + conv_shapes[0], { 'stride': 1, 'pad': pad1 }, inp_layer) conv1 = layers.SpatialBatchnorm((conv_filters[0], ) + res_shape, training_name, conv1) conv1 = layers.Relu(conv1) conv1 = layers.Dropout(0.6, training_name, conv1) assert conv_shapes[1][0] == conv_shapes[1][1] pad2 = conv_shapes[1][1] // 2 conv2 = layers.Conv((conv_filters[1], conv_filters[0]) + conv_shapes[1], { 'stride': 1, 'pad': pad2 }, conv1) conv2 = layers.SpatialBatchnorm((conv_filters[1], ) + res_shape, training_name, conv2) conv2 = layers.Relu(conv2) conv2 = layers.Dropout(0.6, training_name, conv2) pool = layers.MaxPool((2, 2), 2, conv2) return pool
def __init__(self, input_size, hidden_size, output_size, init_weight_std=0.01): self.params = dict() self.params["W1"] = init_weight_std * np.random.rand(input_size, hidden_size) self.params["b1"] = np.zeros(hidden_size) self.params["W2"] = init_weight_std * np.random.rand(hidden_size, output_size) self.params["b2"] = np.zeros(output_size) self.layers = OrderedDict() self.layers["Affine1"] = L.Affine(self.params["W1"], self.params["b1"]) self.layers["Activation1"] = L.Relu() self.layers["Affine2"] = L.Affine(self.params["W2"], self.params["b2"]) self.layers["Activation2"] = L.Relu() self.output_layer = L.Softmax_with_loss()
def fc_bn_dropout(inp_layer, size, training_name): fc = layers.Affine(size, inp_layer) fc = layers.Batchnorm(size[1], training_name, fc) fc = layers.Relu(fc) fc = layers.Dropout(0.8, training_name, fc) return fc
def test_2layer_net(): params = init_toy_model() X, y = init_toy_data() Y_enc = ut.encode_labels(y) # Make the net layer_1 = layers.Linear(*params['W1'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W1'].T, params['b1'].ravel())) act_1 = layers.Relu() layer_2 = layers.Linear(*params['W2'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W2'].T, params['b2'].ravel())) net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(), optim.SGD(lr=1e-5)) scores = net_2.forward(X) correct_scores = np.asarray([[-1.07260209, 0.05083871, -0.87253915], [-2.02778743, -0.10832494, -1.52641362], [-0.74225908, 0.15259725, -0.39578548], [-0.38172726, 0.10835902, -0.17328274], [-0.64417314, -0.18886813, -0.41106892]]) diff = np.sum(np.abs(scores - correct_scores)) assert (np.isclose(diff, 0.0, atol=1e-6)) loss = net_2.loss(X, Y_enc) correct_loss = 1.071696123862817 assert (np.isclose(loss, correct_loss, atol=1e-8))
def relu_x_forward(): N, D, C = 4, 5, 6 relu = layers.Relu() dm = layers.dummy() def f(x): y = relu.forward(x) yout = dm.forward(y) dy = dm.backward(y, 1) dx = relu.backward(x, dy) return yout, dx return f
def __init__(self, input_size, output_size, init_weight_std=0.01, filter_num=5, filter_size=3 ,pool_size = 2): # input = [C, h, w] size = input_size[1] conv_out_size = (size - filter_size + 1) pool_out_size = int((conv_out_size/2)**2*filter_num) self.params = dict() self.params["W1"] = init_weight_std * np.random.rand(filter_num,input_size[0], filter_size, filter_size) self.params["b1"] = np.zeros(filter_num) self.params["W2"] = init_weight_std * np.random.rand(pool_out_size, output_size) self.params["b2"] = np.zeros(output_size) self.layers = OrderedDict() self.layers["Conv"] = L.Convolution(self.params["W1"], self.params["b1"]) self.layers["Activation1"] = L.Relu() self.layers["Pooling"] = L.Pooling(pool_size,pool_size,stride=2) self.layers["Affine1"] = L.Affine(self.params["W2"], self.params["b2"]) self.layers["Activation2"] = L.Relu() self.output_layer = L.Softmax_with_loss() self.y = None
def __init__(self, dimhid=100, dimout=10, weight_init_std=0.01, imgsize=28): strid = 1 pad = 0 fitsize = 3 cout = 30 cin = 1 self.layers = OrderedDict() self.layers['conv'] = layers.conv(cin, cout, fitsize, strid=strid, pading=pad) self.layers['relu1'] = layers.Relu() self.layers['Pool1'] = layers.Pooling(pool_h=2, pool_w=2, stride=2) con_out_size = (imgsize - fitsize + 2 * pad) / strid + 1 dimin = int(cout * (con_out_size / 2) * (con_out_size / 2)) self.layers['fc1'] = layers.FC(dimin, dimhid) self.layers['relu2'] = layers.Relu() self.layers['fc2'] = layers.FC(dimhid, dimout) self.softmax = layers.SoftmaxWithLoss()
def test_2layer_grad(): params = init_toy_model() X, y = init_toy_data() Y_enc = ut.encode_labels(y) # Make the net layer_1 = layers.Linear(*params['W1'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W1'].T, params['b1'].ravel())) act_1 = layers.Relu() layer_2 = layers.Linear(*params['W2'].T.shape, reg='frob', reg_param=0.05, init_vals=(params['W2'].T, params['b2'].ravel())) net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(), optim.SGD(lr=1e-5)) loss = net_2.loss(X, Y_enc) net_2.backward() def f_change_param(param_name, U): if param_name == 3: net_2.layers[0].params['b'] = U if param_name == 2: net_2.layers[0].params['W'] = U if param_name == 1: net_2.layers[2].params['b'] = U if param_name == 0: net_2.layers[2].params['W'] = U return net_2.loss(X, Y_enc) rel_errs = np.empty(4) for param_name in range(4): f = lambda U: f_change_param(param_name, U) if param_name == 3: pass_pars = net_2.layers[0].params['b'] if param_name == 2: pass_pars = net_2.layers[0].params['W'] if param_name == 1: pass_pars = net_2.layers[2].params['b'] if param_name == 0: pass_pars = net_2.layers[2].params['W'] param_grad_num = dutil.grad_check(f, pass_pars, epsilon=1e-5) rel_errs[param_name] = ut.rel_error(param_grad_num, net_2.grads[param_name]) assert (np.allclose(rel_errs, np.zeros(4), atol=1e-7))
def test_relu(self): l = layers.Relu(3, 3) self.assertEqual(l.a(3), 3) self.assertEqual(l.a(-3), 0) self.assertEqual(l.der(23), 1) self.assertEqual(l.der(-3), 0)
def __init__(self, dimin, dimhid, dimout, weight_init_std=0.01): self.layers = OrderedDict() self.layers['fc1'] = layers.FC(dimin, dimhid) self.layers['relu'] = layers.Relu() self.layers['fc2'] = layers.FC(dimhid, dimout) self.softmax = layers.SoftmaxWithLoss()
# Normalizing the data train_image = train_image / SCALER test_image = test_image / SCALER X_train, X_valid, y_train, y_valid = train_valid_split(train_image, train_label, test_size=test_size) N_train = X_train.shape[0] dimensions = X_train.shape[1] num_L1 = dimensions # Model model = dict() model['L1'] = layers.Dense(input_D=num_L1, output_D=num_L2) model['relu1'] = layers.Relu() model['L2'] = layers.Dense(input_D=num_L2, output_D=num_L3) model['loss'] = layers.SoftmaxCrossEntropy() for t in range(num_epoch): # Minibatch generate idx_permute = np.random.permutation(N_train) num_batches = int(N_train // batch_size) for i in range(num_batches): X_batch, y_batch = get_minibatch( X_train, y_train, idx_permute[i * batch_size:(i + 1) * batch_size]) a1 = model['L1'].forward(X_batch) h1 = model['relu1'].forward(a1)