def __init__(self, X, labels): data, label = L.Data([X, labels], "data", batch_size=100)() conv1 = L.Conv(data, "conv1", dim_out=20, kernel=5) pool1 = L.Pool(conv1, "pool1", pool=L.Pool.MAX, kernel=2, stride=2) conv2 = L.Conv(pool1, "conv2", dim_out=50, kernel=5) pool2 = L.Pool(conv2, "pool2", pool=L.Pool.MAX, kernel=2, stride=2) fc3 = L.FC(pool2, "fc3", dim_out=500) relu3 = L.ReLU(fc3, "relu3") pred = L.FC(relu3, "pred", dim_out=10) loss = L.SoftmaxWithLoss(pred, "loss", label=label) # Net Instance self.net = mobula.Net() # Set Loss Layer self.net.set_loss(loss)
def test_softmax(): N, C, H, W = 2, 3, 4, 5 a = np.random.random((N, C, H, W)) - 0.5 for axis in range(4): l = L.Softmax(a, axis=axis) label = np.random.randint(0, a.shape[axis], size=a.size // a.shape[axis]) loss_l = L.SoftmaxWithLoss(a, axis=axis, label=label) l.reshape() loss_l.reshape() y = l.eval() loss = loss_l.eval() exp = np.exp(a) su = np.sum(exp, axis=axis) axes = [slice(None)] * 4 axes[axis] = np.newaxis pu = [1] * 4 pu[axis] = a.shape[axis] s = np.tile(su[axes], pu) # softmax forward assert np.allclose(y, exp / s) assert np.allclose(np.sum(y, axis), np.ones(su.shape)) # softmax-with-loss forward assert np.allclose(loss_l.softmax, l.Y) assert np.allclose(loss_l.Y, -np.mean(np.log(get_val_from_arg(y, label, axis)))) # softmax backward l.dY = np.random.random(l.Y.shape) l.backward() dX = np.multiply(exp / s - np.square(exp / s), l.dY) assert np.allclose(l.dX, dX) # softmax-with-loss backward loss_l.dY = np.random.random(loss_l.Y.shape) loss_l.backward() z = np.zeros(y.shape) z.ravel()[get_idx_from_arg(z, label, axis)] = 1 tl = y - z assert np.allclose(tl * loss_l.dY, loss_l.dX)
# transfer the shape of X to NCHW # N, C, H, W = n, 1, 28, 28 X.resize((n, 1, 28, 28)) # LeNet-5 data, label = L.Data([X, labels], "data", batch_size = 100) conv1 = L.Conv(data, "conv1", dim_out = 20, kernel = 5) pool1 = L.Pool(conv1, "pool1", pool = L.Pool.MAX, kernel = 2, stride = 2) relu1 = L.ReLU(pool1, "relu1") conv2 = L.Conv(relu1, "conv2", dim_out = 50, kernel = 5) pool2 = L.Pool(conv2, "pool2", pool = L.Pool.MAX, kernel = 2, stride = 2) relu2 = L.ReLU(pool2, "relu2") fc3 = L.FC(relu2, "fc3", dim_out = 500) relu3 = L.ReLU(fc3, "relu3") pred = L.FC(relu3, "pred", dim_out = 10) loss = L.SoftmaxWithLoss(pred, "loss", label = label) # Net Instance net = mobula.Net() # Set Loss Layer net.set_loss(loss) # Set Solver solver = S.Momentum(gamma = 0.1, stepsize = 1000) solver.lr_policy = S.LR_POLICY.STEP net.set_solver(S.Momentum()) # Learning Rate net.lr = 0.005