def train(): dataset = utils.MRPCSingle("./MRPC", rows=2000) UNITS = 256 N_LAYERS = 2 BATCH_SIZE = 16 LEARNING_RATE = 2e-3 print('num word: ', dataset.num_word) model = ELMo(v_dim=dataset.num_word, emb_dim=UNITS, units=UNITS, n_layers=N_LAYERS, lr=LEARNING_RATE) if torch.cuda.is_available(): print("GPU train avaliable") device = torch.device("cuda") model = model.cuda() else: device = torch.device("cpu") model = model.cpu() loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) for i in range(10): for batch_idx, batch in enumerate(loader): batch = batch.type(torch.LongTensor).to(device) loss, (fo, bo) = model.step(batch) if batch_idx % 20 == 0: fp = fo[0].cpu().data.numpy().argmax(axis=1) bp = bo[0].cpu().data.numpy().argmax(axis=1) print( "\n\nEpoch: ", i, "| batch: ", batch_idx, "| loss: %.3f" % loss, "\n| tgt: ", " ".join([ dataset.i2v[i] for i in batch[0].cpu().data.numpy() if i != dataset.pad_id ]), "\n| f_prd: ", " ".join( [dataset.i2v[i] for i in fp if i != dataset.pad_id]), "\n| b_prd: ", " ".join( [dataset.i2v[i] for i in bp if i != dataset.pad_id]), ) os.makedirs("./visual/models/elmo", exist_ok=True) torch.save(model.state_dict(), "./visual/models/elmo/model.pth") export_w2v(model, batch[:4], device)
" ".join([data.i2v[i] for i in seqs[0] if i != data.pad_id]), "\n| f_prd: ", " ".join([data.i2v[i] for i in fp if i != data.pad_id]), "\n| b_prd: ", " ".join([data.i2v[i] for i in bp if i != data.pad_id]), ) t0 = t1 os.makedirs("./visual/models/elmo", exist_ok=True) model.save_weights("./visual/models/elmo/model.ckpt") def export_w2v(model, data): model.load_weights("./visual/models/elmo/model.ckpt") emb = model.get_emb(data.sample(4)) print(emb) if __name__ == "__main__": UNITS = 256 N_LAYERS = 2 BATCH_SIZE = 16 LEARNING_RATE = 2e-3 d = utils.MRPCSingle("./MRPC", rows=2000) print("num word: ", d.num_word) m = ELMo(d.num_word, emb_dim=UNITS, units=UNITS, n_layers=N_LAYERS, lr=LEARNING_RATE) train(m, d, 10000) export_w2v(m, d)