def __init__(self, training_data_ids_path, validation_data_ids_path, language_model_model_dir, data_itos_path, cuda_device_id=0, batch_size=32, dropout_multiplier=0.7): torch.cuda.set_device(cuda_device_id) self.use_cuda = True if cuda_device_id >= 0 else False self.inspire_data_itos = pickle.load(open(data_itos_path, 'rb')) self.vocabulary_size = len(self.inspire_data_itos) number_of_backpropagation_through_time_steps = 70 number_of_hidden_units = 1150 number_of_layers = 3 self.embedding_size = 400 optimization_function = partial(optim.Adam, betas=(0.8, 0.99)) training_token_ids = np.load(training_data_ids_path) training_token_ids = np.concatenate(training_token_ids) validation_token_ids = np.load(validation_data_ids_path) validation_token_ids = np.concatenate(validation_token_ids) training_dataloader = LanguageModelLoader( nums=training_token_ids, bs=batch_size, bptt=number_of_backpropagation_through_time_steps) validation_dataloader = LanguageModelLoader( nums=validation_token_ids, bs=batch_size, bptt=number_of_backpropagation_through_time_steps) model = LanguageModelData( path=language_model_model_dir, pad_idx=1, n_tok=self.vocabulary_size, trn_dl=training_dataloader, val_dl=validation_dataloader, bs=batch_size, bptt=number_of_backpropagation_through_time_steps) dropouts = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * dropout_multiplier self.learner = model.get_model(opt_fn=optimization_function, emb_sz=self.embedding_size, n_hid=number_of_hidden_units, n_layers=number_of_layers, dropouti=dropouts[0], dropout=dropouts[1], wdrop=dropouts[2], dropoute=dropouts[3], dropouth=dropouts[4]) self.learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1) self.learner.clip = 0.3 self.learner.metrics = [accuracy]
def _train_lm(self, train_ids, batch_size=4, val_ids=None): train_dataloader = LanguageModelLoader(np.concatenate(train_ids), batch_size, self._bptt) val_dataloader = LanguageModelLoader(np.concatenate(val_ids), batch_size, self._bptt) md = LanguageModelData("tmp", 1, self._vocab.size, train_dataloader, val_dataloader, bs=batch_size, bptt=self._bptt) self._language_model = md.get_model(self.OPT_FN, self._embedding_size, self._n_hidden_activations, self._n_layers, dropouti=self._dropouts_lm[0], dropout=self._dropouts_lm[1], wdrop=self._dropouts_lm[2], dropoute=self._dropouts_lm[3], dropouth=self._dropouts_lm[4]) self._language_model.metrics = [accuracy] self._language_model.unfreeze() lr = 1e-3 self._language_model.lr_find(start_lr=lr / 10, end_lr=lr * 50, linear=True) self._language_model.fit( lr / 2, 1, wds=self._wd, use_clr=(32, 2), cycle_len=1, callbacks=[LoggingCallback(save_path="./tmp/log")]) self._language_model.lr_find(start_lr=lr / 10, end_lr=lr * 10, linear=True) self._language_model.fit( lr, 1, wds=self._wd, use_clr=(32, 2), cycle_len=20, callbacks=[LoggingCallback(save_path="./tmp/log")]) self._language_model.save_encoder("enc_weights")
itos[v] = k # In[12]: itos[4] # In[13]: path = Path("../data/cache/lm_word/") path.mkdir(parents=True, exist_ok=True) model_data = LanguageModelData( path, pad_idx=0, n_tok=n_tok, trn_dl=trn_loader, val_dl=val_loader, test_dl=tst_loader ) # ### QRNN Model # In[ ]: drops = np.array([0.05, 0.1, 0.05, 0, 0.1]) learner = model_data.get_model( partial(Adam, betas=(0.8, 0.999)), emb_sz=300, n_hid=500, n_layers=4, dropouti=drops[0], dropout=drops[1], wdrop=drops[2], dropoute=drops[3], dropouth=drops[4], qrnn=True )
bs = 64 bptt = 50 trn_dl = LanguageModelLoader(np.concatenate(tokens_train), bs, bptt) val_dl = LanguageModelLoader(np.concatenate(tokens_val), bs, bptt) # In[21]: np.max(np.array(list(itertools.chain.from_iterable(tokens_train)))) # In[23]: model_data = LanguageModelData(path, 2, n_toks, trn_dl, val_dl, bs=bs, bptt=bptt) # In[24]: drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * 0.7 opt_fn = partial(torch.optim.Adam, betas=(0.8, 0.99)) # In[25]: learner = model_data.get_model(opt_fn, EMB_DIM, 500, 3,