def test_mnist(): (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_mnist() m = TestCNN(10) mse = nn.MSELoss() m.cuda() mse.cuda() optim = th.optim.SGD(m.parameters(), lr=1e-2) batch_size = 64 nb_epoch = 10 nb_batch = ceil(x_train.size(0) / batch_size) for e in range(nb_epoch): sum_loss = 0 m.train() for i in tqdm(range(nb_batch)): i_min = i * batch_size i_max = (i + 1) * batch_size i_max = i_max if i_max < x_train.size(0) else x_train.size(0) x, y = x_train[i_min:i_max, :, :].cuda(), y_train[i_min:i_max].cuda() pred = m(x) loss = mse(pred, th.eye(10)[y].cuda()) optim.zero_grad() loss.backward() #print("CNN_el = %d, grad_norm = %f" % (m.seq_lin[0].weight.grad.nelement(), m.seq_lin[0].weight.grad.norm())) optim.step() sum_loss += loss.item() print("Epoch %d, loss = %f" % (e, sum_loss / nb_batch)) with th.no_grad(): nb_batch_valid = ceil(x_valid.size(0) / batch_size) nb_correct = 0 for i in tqdm(range(nb_batch_valid)): i_min = i * batch_size i_max = (i + 1) * batch_size i_max = i_max if i_max < x_valid.size(0) else x_valid.size(0) x, y = x_valid[i_min:i_max, :, :].cuda(), y_valid[i_min:i_max].cuda() pred = m(x) nb_correct += (pred.argmax(dim=1) == y).sum().item() nb_correct /= x_valid.size(0) print("Epoch %d, accuracy = %f" % (e, nb_correct)) return m.seq_conv
def load_data(mnist_path): number_of_labels = 10 train_images, train_labels = mnist.load_mnist(dataset="training", path=mnist_path) tl = np.zeros((train_labels.shape[0], number_of_labels)) for i in range(train_labels.shape[0]): tl[i][train_labels[i]] = 1 train_labels = tl train_images = train_images.flatten().reshape(60000, 784) return train_images, train_labels
def train_mnist(): ag = [] nb_class = 10 img_size = 28 n = 64 f = 7 n_m = 12 d = 2 nb_action = 4 batch_size = 64 t = 7 nr = 1 cuda = True #m = ModelsUnion(n, f, n_m, d, nb_action, nb_class, test_mnist()) m = ModelsUnion(n, f, n_m, d, nb_action, nb_class) a1 = Agent(ag, m, n, f, n_m, img_size, nb_action, batch_size, obs_MNIST, trans_MNIST) a2 = Agent(ag, m, n, f, n_m, img_size, nb_action, batch_size, obs_MNIST, trans_MNIST) a3 = Agent(ag, m, n, f, n_m, img_size, nb_action, batch_size, obs_MNIST, trans_MNIST) ag.append(a1) ag.append(a2) ag.append(a3) if cuda: for a in ag: a.cuda() sm = Softmax(dim=-1) criterion = MSELoss() if cuda: criterion.cuda() params = [] for net in m.get_networks(): if cuda: net.cuda() params += list(net.parameters()) optim = th.optim.Adam(params, lr=1e-3) nb_epoch = 10 (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_mnist() x_train, y_train = x_train[:10000], y_train[:10000] nb_batch = ceil(x_train.size(0) / batch_size) loss_v = [] acc = [] for e in range(nb_epoch): sum_loss = 0 for net in m.get_networks(): net.train() grad_norm_cnn = [] grad_norm_pred = [] random_walk = e < 5 for i in tqdm(range(nb_batch)): i_min = i * batch_size i_max = (i + 1) * batch_size i_max = i_max if i_max < x_train.size(0) else x_train.size(0) losses = [] for k in range(nr): x, y = x_train[i_min:i_max, :, :], y_train[i_min:i_max] if cuda: x, y = x.cuda(), y.cuda() pred, log_probas = step(ag, x, t, sm, cuda, random_walk, nb_class) # Sum on agent dimension proba_per_image = log_probas.sum(dim=0) y_eye = th.eye(nb_class)[y] if cuda: y_eye = y_eye.cuda() r = -criterion(pred, y_eye) # Mean on image batch l = (proba_per_image * r.detach() + r).mean(dim=0).view(-1) losses.append(l) loss = -th.cat(losses).sum() / nr optim.zero_grad() loss.backward() optim.step() sum_loss += loss.item() grad_norm_cnn.append( m.get_networks()[0].seq_lin[0].weight.grad.norm()) grad_norm_pred.append( m.get_networks()[-1].seq_lin[0].weight.grad.norm()) sum_loss /= nb_batch print("Epoch %d, loss = %f" % (e, sum_loss)) print("grad_cnn_norm_mean = %f, grad_pred_norm_mean = %f" % (sum(grad_norm_cnn) / len(grad_norm_cnn), sum(grad_norm_pred) / len(grad_norm_pred))) print("CNN_el = %d, Pred_el = %d" % (m.get_networks()[0].seq_lin[0].weight.grad.nelement(), m.get_networks()[-1].seq_lin[0].weight.grad.nelement())) nb_correct = 0 nb_batch_valid = ceil(x_valid.size(0) / batch_size) for net in m.get_networks(): net.eval() with th.no_grad(): for i in tqdm(range(nb_batch_valid)): i_min = i * batch_size i_max = (i + 1) * batch_size i_max = i_max if i_max < x_valid.size(0) else x_valid.size(0) x, y = x_valid[ i_min:i_max, :, :].cuda(), y_valid[i_min:i_max].cuda() pred, proba = step(ag, x, t, sm, cuda, random_walk, nb_class) nb_correct += (pred.argmax(dim=1) == y).sum().item() nb_correct /= x_valid.size(0) acc.append(nb_correct) loss_v.append(sum_loss) print("Epoch %d, accuracy = %f" % (e, nb_correct)) plt.plot(acc, "b", label="accuracy") plt.plot(loss_v, "r", label="criterion value") plt.xlabel("Epoch") plt.title("MARL Classification f=%d, n=%d, n_m=%d, d=%d, T=%d" % (f, n, n_m, d, t)) plt.legend() plt.show() viz(ag, x_test[randint(0, x_test.size(0) - 1)], t, sm, f)
#@Time :2018/9/3/0003 21:33 #@Author :zhupengfei #@Site : #@File :mnist_data.py #@Software :PyCharm #@descibe : import sys, os sys.path.append(os.pardir) # 这个相当于include from data.mnist import load_mnist from PIL import Image # python内置的图像处理模块 import numpy as np import pickle from utils.ActivationFunction import * (x_train, y_train), (x_test, y_test) = load_mnist(one_hot_label=False, flatten=True, normalize=False) # print(x_train.shape) # print(y_train.shape) # print(x_test.shape) # print(y_test.shape) # 显示图像 # def image_show(img): # image = Image.fromarray(np.uint8(img)) # image.show() # # # x = x_train[0] # 取出一张图片 # # print(x.shape) # # x = x.reshape(28, 28) # 因为是黑白图像,所以这样还原 # # print(x.shape)