def sample_some(trainer, model, dataset, X_train, n_samples=40, out_path='./samples.png'): prob = model_first_token(dataset, X_train) start_pixel = np.random.choice(np.arange(dataset.vocab_size), size=(n_samples, 1), replace=True, p=prob.numpy()) start_pixel = torch.from_numpy(start_pixel).to(trainer.device) flattened_image_size = 32 * 32 pixels = sample(model, start_pixel, flattened_image_size - 1, temperature=1.0, sample=True, top_k=40) # for visualization we have to invert the permutation used to produce the pixels iperm = torch.argsort(dataset.perm) pixel_size = 32 plt.close('all') n_cols = 8 n_rows = n_samples // n_cols fig, axis = plt.subplots(n_rows, n_cols, figsize=(32, 16)) for i, ax in enumerate(axis.ravel()): pxi = pixels[i][iperm] # undo the encoding permutation pxi = pxi.view(pixel_size, pixel_size).cpu().numpy().astype(np.uint8) # grayscale -> 2D ax.imshow(pxi, cmap='magma') # ax.set_aspect(aspect=1.0) plt.tight_layout() plt.savefig(out_path) plt.close('all')
def run_one(model, input_tokens): input_ints = list(map(cast_int, input_tokens)) input_tensor = torch.tensor([input_ints], dtype=torch.long).to( torch.cuda.current_device()) full_tensor = sample(model, input_tensor, EXPRESSION_SIZE) full_ints = list(map(int, full_tensor[0])) output_ints = full_ints[-EXPRESSION_SIZE:] return list(map(cast_token, output_ints))
def run_one(model, n): "Returns a list of tokens produced." input_ints = get_pixels(n) input_tensor = torch.tensor([input_ints], dtype=torch.long).to( torch.cuda.current_device()) full_tensor = sample(model, input_tensor, OUTPUT_SIZE) full_ints = list(map(int, full_tensor[0])) output_ints = full_ints[-OUTPUT_SIZE:] return [TOKENS[i] for i in output_ints]
def make_prediction(self, x_in): # Reset tracker variables self.batch, self.predict, self.prev_src_len, = 0, 0, 0 clip_src_mem = self.get_clip_src_mem_len(x_in[0]) + 1 x_cut = x_in[:, :clip_src_mem] pred = x_cut.to(self.trainer.device) pred = sample(self.trainer.model, pred, int(self.max_trg + 1)) return pred, clip_src_mem
def model_predict(self, x_in): # Reset tracker variables self.predict, self.batches = 0, 0 # Prepare input cut_index = self.get_input_len(x_in[0]) x_cut = x_in[:, :cut_index] pred = x_cut.to(self.trainer.device) # Make prediction output = sample(self.trainer.model, pred, int(self.max_trg + 1)) return output
def predict_pron(config, train_dataset, trainer, model, word): try: x = torch.tensor([train_dataset.stoi[s] for s in word], dtype=torch.long)[None, ...].to(trainer.device) y = sample(model, x, config['output_max_len'], temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) except: e = sys.exc_info()[0] print('predict_pron(): error %s for word:%s' % (e, word)) # Typically, this can happen if a tested word contains a char # that did not existing during the training step completion = 'N/A' return completion
n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig # initialize a trainer instance and kick off training tconf = TrainerConfig(max_epochs=2, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=2 * len(train_dataset) * block_size, num_workers=4, device='cpu', ckpt_path="./prfaqgen.bin") trainer = Trainer(model, train_dataset, None, tconf) # trainer.train() trainer.load_checkpoint() # trainer.save_checkpoint() # alright, let's sample some character-level Shakespeare from mingpt.utils import sample context = "The launch of Skills Store Arabic was GREEN. The team " x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(trainer.device) y = sample(model, x, 2000, temperature=1.0, sample=True, top_k=40)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) print(completion)
chunk = self.data[idx:idx + self.block_size + 1] dix = [self.stoi[s] for s in chunk] x = torch.tensor(dix[:-1], dtype=torch.long) y = torch.tensor(dix[1:], dtype=torch.long) return x, y block_size = 128 text = open('input.txt', 'r').read() train_dataset = CharDataset(text, block_size) from mingpt.model import GPT, GPTConfig mconf = GPTConfig(train_dataset.vocab_size, train_dataset.block_size, n_layer=8, n_head=8, n_embd=512) model = GPT(mconf) from mingpt.trainer import Trainer, TrainerConfig from mingpt.utils import sample while True: tconf = TrainerConfig(max_epochs=1, batch_size=256, learning_rate=6e-4) trainer = Trainer(model, train_dataset, tconf) trainer.train() context = "O God, O God!" x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None, ...].to(trainer.device) y = sample(model, x, 100, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) print(completion)
def run(context): x = torch.tensor([train_dataset.stoi[s] for s in context], dtype=torch.long)[None,...].cuda() y = sample(model, x, 500, temperature=1.0, sample=True, top_k=10)[0] completion = ''.join([train_dataset.itos[int(i)] for i in y]) # print(completion) return completion
rp = torch.randperm(len(train_dataset)) nest = 5000 # how many images to use for the estimation for i in range(nest): a, _ = train_dataset[int(rp[i])] t = a[0].item() # index of first token in the sequence counts[t] +=1 prob = counts/counts.sum() %%time from mingpt.utils import sample n_samples = 32 start_pixel = np.random.choice(np.arange(C.size(0)), size=(n_samples, 1), replace=True, p=prob) start_pixel = torch.from_numpy(start_pixel).to(trainer.device) pixels = sample(model, start_pixel, 32*32-1, temperature=1.0, sample=True, top_k=100) # for visualization we have to invert the permutation used to produce the pixels iperm = torch.argsort(train_dataset.perm) ncol = 8 nrow = n_samples // ncoal plt.figure(figsize = (16, 8)) for i in range(n_samples): pxi = pixels[i][iperm] # note: undo the encoding permutation plt.subplot(nrow, ncol, i+1) plt.imshow(C[pxi].view(32, 32, 3).numpy().astype(np.unit8)) plt.axis('off') #visualize some of the learned positional embeddings, maybe they contain structure
n_head=8, n_emb=512, ) model = GPT(model_config) trainer_config = TrainerConfig( max_epochs=200, batch_size=512, learning_rate=6e-4, lr_decay=True, warmup_tokens=512 * 20, final_tokens=200 * len(train_dataset) * sample_block_size, num_workers=4, ) trainer = Trainer(model, train_dataset, None, trainer_config) trainer.train() context = "대한민국의 대통령" sample_x = torch.tensor( [train_dataset.char2idx[s] for s in context], dtype=torch.long, )[None, ...].to(trainer.device) sample_y = sample(model, sample_x, 2000, temperature=0.9, sampling=True, top_k=5)[0] completion = "".join([train_dataset.idx2char[int(i)] for i in sample_y]) print(completion)