def test_layer_lin1(self): X = np.random.randn(46, 7) y_true = np.random.randn(46, 3) W = np.random.randn(7, 3) b = np.random.randn(3) dX = val(X) dy_true = val(y_true) dW = val(W) db = val(b) dy_pred = rd.build_add_bias(rd.build_dot_mm(dX, dW), db) dloss = mse(rd.build_reshape(dy_pred, (y_true.size, )), rd.build_reshape(dy_true, (y_true.size, ))) tX = torch.tensor(X, requires_grad=True) ty_true = torch.tensor(y_true, requires_grad=True) tW = torch.tensor(W, requires_grad=True) tb = torch.tensor(b, requires_grad=True) ty_pred = torch.matmul(tX, tW) + tb criterion = torch.nn.MSELoss() tloss = criterion(ty_pred, ty_true) tloss.backward() self.ck_fequals(dloss.eval(), tloss.data.numpy(), feps=1e-3) self.ck_fequals( get_grad(dloss, dy_true).eval(), ty_true.grad.data.numpy()) self.ck_fequals(get_grad(dloss, dW).eval(), tW.grad.data.numpy(), feps=1e-5) self.ck_fequals(get_grad(dloss, db).eval(), tb.grad.data.numpy(), feps=1e-5) self.ck_fequals(get_grad(dloss, dX).eval(), tX.grad.data.numpy())
def test_maxpooling(self): X = np.random.randn(1, 1, 6, 6) tX = val(X) tY = rd.build_max_pooling(tX, 2, 2, 2, 2) tYf = rd.build_reshape(tY, (-1, )) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dY = torch.nn.functional.max_pool2d(dX, (2, 2), (2, 2)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy()) X = np.random.randn(1, 1, 5, 5) tX = val(X) tY = rd.build_max_pooling(tX, 2, 2, 1, 1) tYf = rd.build_reshape(tY, (-1, )) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dY = torch.nn.functional.max_pool2d(dX, (2, 2), (1, 1)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy()) X = np.random.randn(2, 3, 9, 7) tX = val(X) tY = rd.build_max_pooling(tX, 3, 2, 2, 1) tYf = rd.build_reshape(tY, (-1, )) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dY = torch.nn.functional.max_pool2d(dX, (3, 2), (2, 1)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy())
def forward(self, x): x = rd.build_vrelu(self.conv1(x)) x = rd.build_max_pooling(x, 2, 2, 2, 2) x = rd.build_vrelu(self.conv2(x)) x = rd.build_max_pooling(x, 2, 2, 2, 2) x = rd.build_reshape(x, (x.shape[0], -1)) x = self.fc(x) return x
def test_act_softmax(self): x = np.random.randn(11, 7) tx = val(x) tz = rd.build_softmax(tx) tz = rd.build_reshape(tz, (11 * 7, )) te = rd.build_dot_vv(tz, tz) dx = torch.tensor(x, requires_grad=True) dz = torch.relu(dx).view(-1) de = torch.dot(dz, dz) de.backward() self.ck_fequals(tz.eval(), dz.data.numpy(), feps=1e-1)
def test_sgd_logreg_k_l1_l2(self): X = np.random.randn(46, 7).astype(np.float32) w = np.random.randn(7, 4).astype(np.float32) y_true = np.zeros((46, 4)).astype(np.float32) for i in range(y_true.shape[0]): y_true[i][np.random.randint(0, y_true.shape[1])] = 1 alpha_l1 = 0.53 alpha_l2 = 0.82 dX = val(X) dw = val(w) dw_flat = rd.build_reshape(dw, (dw.shape[0] * dw.shape[1], )) dy_true = val(y_true) dy_out = rd.build_dot_mm(dX, dw) dy_pred = rd.build_softmax(dy_out) dloss = rd.build_cross_entropy_loss(dy_out, dy_true) dloss = dloss + alpha_l1 * rd.build_norm1(dw_flat) dloss = dloss + alpha_l2 * rd.build_dot_vv(dw_flat, dw_flat) tX = torch.tensor(X, requires_grad=True) tw = torch.tensor(w, requires_grad=True) tw_flat = tw.view(-1) ty_true = torch.tensor(y_true, requires_grad=False) ty_true = torch.argmax(ty_true, dim=1) ty_out = torch.matmul(tX, tw) ty_pred = torch.nn.functional.softmax(ty_out, dim=1) utils.save_grad(ty_out) criterion = torch.nn.CrossEntropyLoss(reduction='sum') tloss = criterion(ty_out, ty_true) tloss = tloss + alpha_l1 * torch.norm( tw_flat, p=1) + alpha_l2 * torch.dot(tw_flat, tw_flat) tloss.backward() self.ck_fequals(dloss.eval(), tloss.data.numpy(), feps=1e-3) self.ck_fequals(dy_pred.eval(), ty_pred.data.numpy()) self.ck_fequals( get_grad(dloss, dy_out).eval(), utils.get_grad(ty_out).data.numpy()) self.ck_fequals(get_grad(dloss, dw).eval(), tw.grad.data.numpy()) self.ck_fequals(get_grad(dloss, dX).eval(), tX.grad.data.numpy())
def forward(self, x): x = rd.build_reshape(x, (-1, IN_SIZE)) x = rd.build_vrelu(self.l1(x)) x = rd.build_vrelu(self.l2(x)) y_logits = self.l3(x) return y_logits
def test_conv2d_transpose(self): X = np.random.randn(2, 4, 13, 16).astype(np.float32) K = np.random.randn(4, 3, 5, 8).astype(np.float32) b = np.random.randn(3).astype(np.float32) tX = val(X) tK = val(K) tb = val(b) tY = rd.op_conv2d_transpose(tX, tK, 1, 1, 0, 0) tY = rd.build_conv2d_bias_add(tY, tb) tYf = rd.build_reshape(tY, ((-1, ))) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dK = torch.tensor(K, requires_grad=True) db = torch.tensor(b, requires_grad=True) dY = torch.nn.functional.conv_transpose2d(dX, dK, bias=db, stride=(1, 1)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tK).eval(), dK.grad.data.numpy(), feps=1e-4) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy(), feps=1e-5) self.ck_fequals(get_grad(te, tb).eval(), db.grad.data.numpy(), feps=1e-3) X = np.random.randn(2, 4, 5, 5).astype(np.float32) K = np.random.randn(4, 3, 5, 8).astype(np.float32) tX = val(X) tK = val(K) tY = rd.op_conv2d_transpose(tX, tK, 3, 4, 0, 0) tYf = rd.build_reshape(tY, ((-1, ))) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dK = torch.tensor(K, requires_grad=True) dY = torch.nn.functional.conv_transpose2d(dX, dK, stride=(3, 4)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tK).eval(), dK.grad.data.numpy(), feps=1e-5) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy(), feps=1e-5) X = np.random.randn(2, 4, 5, 5).astype(np.float32) K = np.random.randn(4, 3, 5, 8).astype(np.float32) tX = val(X) tK = val(K) tY = rd.op_conv2d_transpose(tX, tK, 3, 4, 6, 8) tYf = rd.build_reshape(tY, ((-1, ))) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dK = torch.tensor(K, requires_grad=True) dY = torch.nn.functional.conv_transpose2d(dX, dK, stride=(3, 4), padding=(6, 8)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tK).eval(), dK.grad.data.numpy(), feps=1e-5) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy(), feps=1e-5) X = np.random.randn(2, 4, 9, 11).astype(np.float32) K = np.random.randn(4, 3, 6, 8).astype(np.float32) tX = val(X) tK = val(K) tY = rd.op_conv2d_transpose(tX, tK, 3, 4, 7, 11) tYf = rd.build_reshape(tY, ((-1, ))) te = rd.build_dot_vv(tYf, tYf) dX = torch.tensor(X, requires_grad=True) dK = torch.tensor(K, requires_grad=True) dY = torch.nn.functional.conv_transpose2d(dX, dK, stride=(3, 4), padding=(7, 11)) dYf = dY.view(-1) de = torch.dot(dYf, dYf) de.backward() self.ck_fequals(tY.eval(), dY.data.numpy()) self.ck_fequals(get_grad(te, tK).eval(), dK.grad.data.numpy(), feps=1e-5) self.ck_fequals(get_grad(te, tX).eval(), dX.grad.data.numpy(), feps=1e-5)