def test_conv_gpu(self): np.random.seed(1337) model = TinyConvNet() [x.cuda_() for x in model.parameters()] optimizer = optim.Adam(model.parameters(), lr=0.001) train(model, optimizer, steps=200, gpu=True) evaluate(model, gpu=True)
def test_conv(self): np.random.seed(1337) model = TinyConvNet() optimizer = optim.Adam(model.parameters(), lr=0.001) train(model, X_train, Y_train, optimizer, steps=200, device=self.device) assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
def train_one_step(model,X,Y): params = get_parameters(model) pcount = 0 for p in params: pcount += np.prod(p.shape) optimizer = optim.Adam(params, lr=0.001) print("stepping %r with %.1fM params bs %d" % (type(model), pcount/1e6, BS)) st = time.time() train(model, X, Y, optimizer, steps=1, BS=BS) et = time.time()-st print("done in %.2f ms" % (et*1000.))
def train(outdir: str = None, epochs: int = 1, batch_size: int = 32): X_train, Y_train, X_test, Y_test = fetch_mnist() model = FCNN(28 * 28, 10) optimizer = optim.Adam(model.parameters(), lr=0.01) for epoch in range(epochs): msg = "loss={loss}" status = trange(len(X_train) // batch_size) Tensor.training = True for batch_idx in status: samp = np.random.choice(len(X_train), size=batch_size) x = Tensor(X_train[samp]) / 255 out = model(x) loss = sparse_categorical_crossentropy(out, Y_train[samp]) status.set_description(msg.format(loss=float(loss.data))) optimizer.zero_grad() loss.backward() optimizer.step() Tensor.training = False correct = [] for batch_idx in trange(len(X_test) // batch_size, desc="Testing"): n = batch_idx * batch_size x = Tensor(X_test[n:n + batch_size]) out = model(x).data.argmax(axis=1) correct.append(out == Y_test[n:n + batch_size]) acc = np.concatenate(correct).mean() print("Accuracy:", acc) if not outdir: return weights = { "w1": model.layer1.weight.data, "b1": model.layer1.bias.data, "w2": model.layer2.weight.data, "b2": model.layer2.bias.data } Path(outdir).mkdir(exist_ok=True) for name, val in weights.items(): np.save(Path(outdir) / f"{name}.npy", val.astype(np.float32)) # select one example of each class indices = np.array([np.argmax(Y_test == i) for i in range(10)]) x = X_test[indices].reshape((10, -1)).astype(np.float32) / 255 y_pred = model(Tensor(x)).softmax().data print("Model predictions:", y_pred.argmax(axis=1)) np.save(Path(outdir) / "samples.npy", x)
def test_conv(self): np.random.seed(1337) model = TinyConvNet() optimizer = optim.Adam(model.parameters(), lr=0.001) train(model, optimizer, steps=200, gpu=self.gpu) evaluate(model, gpu=self.gpu)
def test_conv(self): np.random.seed(1337) model = TinyConvNet() optimizer = optim.Adam([model.c1, model.l1, model.l2], lr=0.001) train(model, optimizer, steps=400) evaluate(model)
lossfn = lambda out,y: out.mul(y).mean() + lmbd*(model.weight1.abs() + model.weight2.abs()).sum() X_train, Y_train, X_test, Y_test = fetch_mnist() steps = len(X_train)//BS np.random.seed(1337) if QUICK: steps = 1 X_test, Y_test = X_test[:BS], Y_test[:BS] model = BigConvNet() if len(sys.argv) > 1: try: model.load(sys.argv[1]) print('Loaded weights "'+sys.argv[1]+'", evaluating...') evaluate(model, X_test, Y_test, BS=BS) except: print('could not load weights "'+sys.argv[1]+'".') if GPU: params = get_parameters(model) [x.cuda_() for x in params] for lr, epochs in zip(lrs, epochss): optimizer = optim.Adam(model.parameters(), lr=lr) for epoch in range(1,epochs+1): #first epoch without augmentation X_aug = X_train if epoch == 1 else augment_img(X_train) train(model, X_aug, Y_train, optimizer, steps=steps, lossfn=lossfn, gpu=GPU, BS=BS) accuracy = evaluate(model, X_test, Y_test, BS=BS) model.save('examples/checkpoint'+str("%.0f" % (accuracy*1.0e6)))
self.fc1 = Tensor.uniform(self.chans, 10) self.fc2 = Tensor.uniform(self.chans, 10) def forward(self, x): x = x.reshape(shape=(-1, 1, 28, 28)) # hacks for i in range(self.blocks): for j in range(self.block_convs): #print(i, j, x.shape, x.sum().cpu()) # TODO: should padding be used? x = x.conv2d(self.convs[i * 3 + j]).add(self.cbias[i * 3 + j]).relu() x = self.bn[i](x) if i > 0: x = x.avg_pool2d(kernel_size=(2, 2)) # TODO: Add concat support to concat with max_pool2d x1 = x.avg_pool2d(kernel_size=x.shape[2:4]).reshape(shape=(-1, x.shape[1])) x2 = x.max_pool2d(kernel_size=x.shape[2:4]).reshape(shape=(-1, x.shape[1])) x = x1.dot(self.fc1) + x2.dot(self.fc2) return x.logsoftmax() if __name__ == "__main__": model = SeriousModel() params = get_parameters(model) if GPU: [x.cuda_() for x in params] optimizer = optim.Adam(params, lr=0.001) train_on_mnist(model, optimizer, steps=1875, BS=32, gpu=GPU)
classes = 10 Tensor.default_gpu = os.getenv("GPU") is not None TINY = os.getenv("TINY") is not None TRANSFER = os.getenv("TRANSFER") is not None if TINY: model = TinyConvNet(classes) elif TRANSFER: model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=True) model.load_weights_from_torch() else: model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=False) parameters = get_parameters(model) print("parameters", len(parameters)) optimizer = optim.Adam(parameters, lr=0.001) #BS, steps = 16, 32 BS, steps = 64 if TINY else 16, 2048 for i in (t := trange(steps)): samp = np.random.randint(0, X_train.shape[0], size=(BS)) img = X_train[samp].astype(np.float32) st = time.time() out = model.forward(Tensor(img)) fp_time = (time.time() - st) * 1000.0 Y = Y_train[samp] y = np.zeros((BS, classes), np.float32)
batch_size = 128 k = 1 epochs = 100 generator_params = get_parameters(generator) discriminator_params = get_parameters(discriminator) gen_loss = [] disc_loss = [] output_folder = "outputs" os.makedirs(output_folder, exist_ok=True) train_data_size = len(X_train) ds_noise = Tensor(np.random.uniform(size=(64,128)).astype(np.float32), gpu=GPU, requires_grad=False) n_steps = int(train_data_size/batch_size) if GPU: [x.cuda_() for x in generator_params+discriminator_params] # optimizers optim_g = optim.Adam(generator_params, lr=0.001) optim_d = optim.Adam(discriminator_params, lr=0.001) def regularization_l2(model, a=1e-4): #TODO: l2 reg loss pass def generator_batch(): idx = np.random.randint(0, X_train.shape[0], size=(batch_size)) image_b = X_train[idx].reshape(-1, 28*28).astype(np.float32)/255. image_b = (image_b - 0.5)/0.5 return Tensor(image_b, gpu=GPU) def real_label(bs): y = np.zeros((bs,2), np.float32) y[range(bs), [1]*bs] = -2.0
epochs = 300 generator_params = get_parameters(generator) discriminator_params = get_parameters(discriminator) gen_loss = [] disc_loss = [] output_folder = "outputs" os.makedirs(output_folder, exist_ok=True) train_data_size = len(X_train) ds_noise = Tensor(np.random.randn(64, 128).astype(np.float32), gpu=GPU, requires_grad=False) n_steps = int(train_data_size / batch_size) if GPU: [x.gpu_() for x in generator_params + discriminator_params] # optimizers optim_g = optim.Adam(generator_params, lr=0.0002, b1=0.5) # 0.0002 for equilibrium! optim_d = optim.Adam(discriminator_params, lr=0.0002, b1=0.5) def regularization_l2(model, a=1e-4): #TODO: l2 reg loss pass def generator_batch(): idx = np.random.randint(0, X_train.shape[0], size=(batch_size)) image_b = X_train[idx].reshape(-1, 28 * 28).astype(np.float32) / 255. image_b = (image_b - 0.5) / 0.5 return Tensor(image_b, gpu=GPU) def real_label(bs): y = np.zeros((bs, 2), np.float32) y[range(bs), [1] * bs] = -2.0