def test_rmsprop(self): np.random.seed(1337) model = TinyBobNet() optimizer = optim.RMSprop(model.parameters(), lr=0.0002) train(model, X_train, Y_train, optimizer, steps=1000, device=self.device) assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
def test_conv(self): np.random.seed(1337) model = TinyConvNet() optimizer = optim.Adam(model.parameters(), lr=0.001) train(model, X_train, Y_train, optimizer, steps=200, device=self.device) assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
def train_one_step(model,X,Y): params = get_parameters(model) pcount = 0 for p in params: pcount += np.prod(p.shape) optimizer = optim.Adam(params, lr=0.001) print("stepping %r with %.1fM params bs %d" % (type(model), pcount/1e6, BS)) st = time.time() train(model, X, Y, optimizer, steps=1, BS=BS) et = time.time()-st print("done in %.2f ms" % (et*1000.))
def test_sgd(self): np.random.seed(1337) model = TinyBobNet() optimizer = optim.SGD(model.parameters(), lr=0.001) train(model, X_train, Y_train, optimizer, steps=1000) assert evaluate(model, X_test, Y_test) > 0.95
lossfn = lambda out,y: out.mul(y).mean() + lmbd*(model.weight1.abs() + model.weight2.abs()).sum() X_train, Y_train, X_test, Y_test = fetch_mnist() steps = len(X_train)//BS np.random.seed(1337) if QUICK: steps = 1 X_test, Y_test = X_test[:BS], Y_test[:BS] model = BigConvNet() if len(sys.argv) > 1: try: model.load(sys.argv[1]) print('Loaded weights "'+sys.argv[1]+'", evaluating...') evaluate(model, X_test, Y_test, BS=BS) except: print('could not load weights "'+sys.argv[1]+'".') if GPU: params = get_parameters(model) [x.cuda_() for x in params] for lr, epochs in zip(lrs, epochss): optimizer = optim.Adam(model.parameters(), lr=lr) for epoch in range(1,epochs+1): #first epoch without augmentation X_aug = X_train if epoch == 1 else augment_img(X_train) train(model, X_aug, Y_train, optimizer, steps=steps, lossfn=lossfn, gpu=GPU, BS=BS) accuracy = evaluate(model, X_test, Y_test, BS=BS) model.save('examples/checkpoint'+str("%.0f" % (accuracy*1.0e6)))
ds_Y = np.copy(ds[:, 1:]) ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:] ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:] return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test from tinygrad.optim import Adam if __name__ == "__main__": model = Transformer(10, 6, 2, 128, 4) X_train, Y_train, X_test, Y_test = make_dataset() lr = 0.003 for i in range(10): optim = Adam(get_parameters(model), lr=lr) train(model, X_train, Y_train, optim, 50, BS=64) acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True) lr /= 1.2 print(f'reducing lr to {lr:.4f}') if acc > 0.998: wrong = 0 for k in range(len(Y_test_preds)): if (Y_test_preds[k] != Y_test[k]).any(): wrong += 1 a, b, c, x = X_test[k, :2], X_test[k, 2:4], Y_test[ k, -3:], Y_test_preds[k, -3:] print(
def forward(self, x): bs = x.shape[0] xnp = x.cpu().data onehot = np.zeros((bs, x.shape[1], self.maxlen + self.syms), dtype=np.float32) for i in range(x.shape[1]): onehot[range(bs), i, i] = 1 onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1 onehot = onehot.reshape(bs * x.shape[1], self.maxlen + self.syms) x = Tensor(onehot, device=x.device).dot( self.embed).reshape(shape=(bs, x.shape[1], -1)) for t in self.tbs: x = t(x) x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax() return x.reshape(shape=(bs, -1, x.shape[-1])) from tinygrad.optim import Adam if __name__ == "__main__": model = Transformer(10, 6, 2, 128, 4) #in1 = Tensor.zeros(20, 6, 128) #ret = model.forward(in1) #print(ret.shape) X_train, Y_train, X_test, Y_test = make_dataset() optim = Adam(get_parameters(model), lr=0.001) train(model, X_train, Y_train, optim, 100)
ds.append([ i // 10, i % 10, j // 10, j % 10, s // 100, (s // 10) % 10, s % 10 ]) random.shuffle(ds) ds = np.array(ds) ds_X = ds[:, 0:6] ds_Y = np.copy(ds[:, 1:]) ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:] ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:] return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test from tinygrad.optim import Adam if __name__ == "__main__": model = Transformer(10, 6, 2, 128, 4) X_train, Y_train, X_test, Y_test = make_dataset() optim = Adam(get_parameters(model), lr=0.001) for i in range(5): train(model, X_train, Y_train, optim, 500, BS=32, device=Device.GPU if os.getenv("GPU") else Device.CPU) evaluate(model, X_test, Y_test, num_classes=10)
from tinygrad.optim import Adam class ComposeTransforms: def __init__(self, trans): self.trans = trans def __call__(self, x): for t in self.trans: x = t(x) return x if __name__ == "__main__": model = ResNet18(num_classes=10, pretrained=True) X_train, Y_train, X_test, Y_test = fetch_mnist() X_train = X_train.reshape(-1, 28, 28).astype(np.uint8) X_test = X_test.reshape(-1, 28, 28).astype(np.uint8) lr = 5e-5 transform = ComposeTransforms([ lambda x: [Image.fromarray(xx, mode='L').resize((64, 64)) for xx in x], lambda x: np.stack([np.asarray(xx) for xx in x], 0), lambda x: x / 255.0, lambda x: np.tile(np.expand_dims(x, 1), (1, 3, 1, 1)).astype(np.float32), ]) for i in range(10): optim = Adam(get_parameters(model), lr=lr) train(model, X_train, Y_train, optim, 50, BS=32, transform=transform) acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True, transform=transform) lr /= 1.2 print(f'reducing lr to {lr:.4f}')
self.final = Tensor.uniform(embed_dim, syms) def forward(self, x): bs = x.shape[0] xnp = x.cpu().data onehot = np.zeros((bs, x.shape[1], self.maxlen + self.syms), dtype=np.float32) for i in range(x.shape[1]): onehot[range(bs), i, i] = 1 onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1 onehot = onehot.reshape(bs * x.shape[1], self.maxlen + self.syms) x = Tensor(onehot, device=x.device).dot( self.embed).reshape(shape=(bs, x.shape[1], -1)) for t in self.tbs: x = t(x) x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax() return x.reshape(shape=(bs, -1, x.shape[-1])) from tinygrad.optim import Adam if __name__ == "__main__": model = Transformer(10, 6, 2, 128, 4) X_train, Y_train, X_test, Y_test = make_dataset() optim = Adam(get_parameters(model), lr=0.001) for i in range(5): train(model, X_train, Y_train, optim, 500, BS=32) evaluate(model, X_test, Y_test, num_classes=10)