def main(): qanet = QANet(50) init1 = filter(lambda p: p.requires_grad and p.dim() >= 2, qanet.parameters()) init2 = filter(lambda p: p.requires_grad and p.dim() <= 2, qanet.parameters()) # Parameter initialization for param in init1: nn.init.xavier_uniform_(param) for param in init2: nn.init.normal_(param) train = SQuAD(TRAIN_JSON) val = SQuAD(DEV_JSON) # trainSet = DataLoader(dataset=train, batch_size=4, shuffle=True, collate_fn=collate) valSet = DataLoader(dataset=val, batch_size=4, shuffle=True, collate_fn=collate) trainSet = DataLoader(dataset=train, batch_size=4, shuffle=True, collate_fn=collate) print('length of dataloader', len(trainSet)) optimizer = torch.optim.Adam(qanet.parameters(), lr=LEARNING_RATE) loss_list = [] for epoch in range(10): print('epoch ', epoch) for i, (c, q, a) in enumerate(trainSet): y_pred = qanet(c, q) loss = utils.loss(y_pred, a) loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() if i % 200 == 0: print('loss ', loss.item()) with open('your_file.txt', 'w') as f: for item in loss_list: f.write("%s\n" % item) print('loss file written.') torch.save(qanet, 'qanet') print('model saved.')
def train(model_params, launch_params): with open(launch_params['word_emb_file'], "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(launch_params['char_emb_file'], "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(launch_params['train_eval_file'], "r") as fh: train_eval_file = json.load(fh) with open(launch_params['dev_eval_file'], "r") as fh: dev_eval_file = json.load(fh) writer = SummaryWriter(os.path.join(launch_params['log'], launch_params['prefix'])) lr = launch_params['learning_rate'] base_lr = 1.0 warm_up = launch_params['lr_warm_up_num'] model_params['word_mat'] = word_mat model_params['char_mat'] = char_mat logging.info('Load dataset and create model.') dev_dataset = SQuADDataset(launch_params['dev_record_file'], launch_params['test_num_batches'], launch_params['batch_size'], launch_params['word2ind_file']) if launch_params['fine_tuning']: train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['fine_tuning_steps'], launch_params['batch_size'], launch_params['word2ind_file']) model_args = pickle.load(open(launch_params['args_filename'], 'rb')) model = QANet(**model_args) model.load_state_dict(torch.load(launch_params['dump_filename'])) model.to(device) else: train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['num_steps'], launch_params['batch_size'], launch_params['word2ind_file']) model = QANet(**model_params).to(device) launch_params['fine_tuning_steps'] = 0 params = filter(lambda param: param.requires_grad, model.parameters()) optimizer = optim.Adam(params, lr=base_lr, betas=(launch_params['beta1'], launch_params['beta2']), eps=1e-7, weight_decay=3e-7) cr = lr / log2(warm_up) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ee: cr * log2(ee + 1) if ee < warm_up else lr) qt = False logging.info('Start training.') for iter in range(launch_params['num_steps']): try: passage_w, passage_c, question_w, question_c, y1, y2, ids = train_dataset[iter] passage_w, passage_c = passage_w.to(device), passage_c.to(device) question_w, question_c = question_w.to(device), question_c.to(device) y1, y2 = y1.to(device), y2.to(device) loss, p1, p2 = model.train_step([passage_w, passage_c, question_w, question_c], y1, y2, optimizer, scheduler) if iter % launch_params['train_interval'] == 0: logging.info('Iteration %d; Loss: %f', iter+launch_params['fine_tuning_steps'], loss) writer.add_scalar('Loss', loss, iter+launch_params['fine_tuning_steps']) if iter % launch_params['train_sample_interval'] == 0: start = torch.argmax(p1[0, :]).item() end = torch.argmax(p2[0, start:]).item()+start passage = train_dataset.decode(passage_w) question = train_dataset.decode(question_w) generated_answer = train_dataset.decode(passage_w[:, start:end+1]) real_answer = train_dataset.decode(passage_w[:, y1[0]:y2[0]+1]) logging.info('Train Sample:\n Passage: %s\nQuestion: %s\nOriginal answer: %s\nGenerated answer: %s', passage, question, real_answer, generated_answer) if iter % launch_params['test_interval'] == 0: metrics, _ = evaluation(model, train_dataset, train_eval_file, launch_params['val_num_batches']) logging.info("VALID loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match']) writer.add_scalar('Valid_loss', metrics['loss'], iter) writer.add_scalar('Valid_f1', metrics['f1'], iter) writer.add_scalar('Valid_em', metrics['exact_match'], iter) if iter % launch_params['test_interval'] == 0: metrics, _ = evaluation(model, dev_dataset, dev_eval_file, launch_params['test_num_batches']) logging.info("TEST loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match']) writer.add_scalar('Test_loss', metrics['loss'], iter) writer.add_scalar('Test_f1', metrics['f1'], iter) writer.add_scalar('Test_em', metrics['exact_match'], iter) except RuntimeError as e: logging.error(str(e)) except KeyboardInterrupt: break torch.save(model.cpu().state_dict(), launch_params['dump_filename']) pickle.dump(model_params, open(launch_params['args_filename'], 'wb')) logging.info('Model has been saved.')
print('Loading Embeddigs..') import numpy as np import json char_emb_matrix = np.array(json.load(open('data/char_emb.json')), dtype=np.float32) word_emb_matrix = np.array(json.load(open('data/word_emb.json')), dtype=np.float32) print('Create Model..') from model import QANet model = QANet(128, 400, 50, word_emb_matrix, char_emb_matrix, droprate=0.1).to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.8, 0.999), eps=1e-08, weight_decay=3e-07, amsgrad=False) del char_emb_matrix, word_emb_matrix import math warm_up = 1000 warm_up_f = lambda x: math.log(x + 1) / math.log(warm_up ) if x < warm_up else 1 scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[warm_up_f]) model_trainer = ModelTrainer(model,