option.im_num_layers) if option.em_actv_func == 'sigmoid': em_actv_func = nn.Sigmoid() elif option.em_actv_func == 'relu': em_actv_func = nn.ReLU() elif option.em_actv_func == 'tanh': em_actv_func = nn.Tanh() else: logging.info('Unknown event activation func: ' + option.em_actv_func) exit(1) event_scorer = nn.Sequential(nn.Linear(option.em_k, 1), em_actv_func) intent_scorer = nn.CosineSimilarity(dim=1) sentiment_classifier = nn.Linear(option.em_k, 1) criterion = MarginLoss(option.margin) sentiment_criterion = nn.BCEWithLogitsLoss() # load pretrained embeddings embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float()) if not option.update_embeddings: event_model.embeddings.weight.requires_grad = False if option.use_gpu: event_model.cuda() intent_model.cuda() sentiment_classifier.cuda() event_scorer.cuda() embeddings_param_id = [id(param) for param in embeddings.parameters()]
def main(option): logging.basicConfig( stream=sys.stdout, level=logging.DEBUG, format= '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s' ) torch.manual_seed(option.random_seed) glove = Glove(option.emb_file) logging.info('Embeddings loaded') train_dataset = EventIntentSentimentDataset() logging.info('Loading train dataset: ' + option.train_dataset) train_dataset.load(option.train_dataset, glove) logging.info('Loaded train dataset: ' + option.train_dataset) train_data_loader = torch.utils.data.DataLoader( train_dataset, collate_fn=EventIntentSentimentDataset_collate_fn, batch_size=option.batch_size, shuffle=True) if option.dev_dataset is not None: dev_dataset = EventIntentSentimentDataset() logging.info('Loading dev dataset: ' + option.dev_dataset) dev_dataset.load(option.dev_dataset, glove) logging.info('Loaded dev dataset: ' + option.dev_dataset) dev_data_loader = torch.utils.data.DataLoader( dev_dataset, collate_fn=EventIntentSentimentDataset_collate_fn, batch_size=len(dev_dataset), shuffle=False) yago_train_dataset = YagoDataset() logging.info('Loading YAGO train dataset: ' + option.yago_train_dataset) yago_train_dataset.load(option.yago_train_dataset, glove) yago_train_data_loader = torch.utils.data.DataLoader( yago_train_dataset, collate_fn=YagoDataset_collate_fn, batch_size=option.batch_size, shuffle=True) if option.yago_dev_dataset is not None: yago_dev_dataset = YagoDataset() logging.info('Loading YAGO dev dataset: ' + option.yago_dev_dataset) yago_dev_dataset.load(option.yago_dev_dataset, glove) yago_dev_data_loader = torch.utils.data.DataLoader( yago_dev_dataset, collate_fn=YagoDataset_collate_fn, batch_size=len(yago_dev_dataset), shuffle=False) embeddings = nn.Embedding(option.vocab_size, option.emb_dim, padding_idx=1) if option.model_type == 'NTN': event_model = NeuralTensorNetwork(embeddings, option.em_k) elif option.model_type == 'RoleFactor': event_model = RoleFactoredTensorModel(embeddings, option.em_k) elif option.model_type == 'LowRankNTN': event_model = LowRankNeuralTensorNetwork(embeddings, option.em_k, option.em_r) intent_model = BiLSTMEncoder(embeddings, option.im_hidden_size, option.im_num_layers) relation_models = {} for relation in yago_train_dataset.relations: relation_model = NeuralTensorNetwork_yago(embeddings, option.em_k) relation_models[relation] = relation_model if option.scorer_actv_func == 'sigmoid': scorer_actv_func = nn.Sigmoid elif option.scorer_actv_func == 'relu': scorer_actv_func = nn.ReLU elif option.scorer_actv_func == 'tanh': scorer_actv_func = nn.Tanh event_scorer = nn.Sequential(nn.Linear(option.em_k, 1), scorer_actv_func()) relation_scorer = nn.Sequential(nn.Linear(option.em_k, 1), scorer_actv_func()) intent_scorer = nn.CosineSimilarity(dim=1) sentiment_classifier = nn.Linear(option.em_k, 1) criterion = MarginLoss(option.margin) sentiment_criterion = nn.BCEWithLogitsLoss() yago_criterion = MarginLoss(option.yago_margin) # load pretrained embeddings embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float()) if not option.update_embeddings: event_model.embeddings.weight.requires_grad = False if option.use_gpu: event_model.cuda() intent_model.cuda() sentiment_classifier.cuda() event_scorer.cuda() relation_scorer.cuda() for relation_model in relation_models.values(): relation_model.cuda() embeddings_param_id = [id(param) for param in embeddings.parameters()] params = [{ 'params': embeddings.parameters() }, { 'params': [ param for param in event_model.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }, { 'params': [ param for param in event_scorer.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }, { 'params': [ param for param in intent_model.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }, { 'params': [ param for param in sentiment_classifier.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }] for relation in relation_models: params.append({ 'params': [ param for param in relation_models[relation].parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }) optimizer = torch.optim.Adagrad(params, lr=option.lr) # load checkpoint if provided: if option.load_checkpoint != '': checkpoint = torch.load(option.load_checkpoint) event_model.load_state_dict(checkpoint['event_model_state_dict']) intent_model.load_state_dict(checkpoint['intent_model_state_dict']) event_scorer.load_state_dict(checkpoint['event_scorer_state_dict']) sentiment_classifier.load_state_dict( checkpoint['sentiment_classifier_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for relation in relation_models: relation_models[relation].load_state_dict( checkpoint['relation_model_state_dict'][relation]) logging.info('Loaded checkpoint: ' + option.load_checkpoint) # load pretrained event model instead: elif option.pretrained_event_model != '': checkpoint = torch.load(option.pretrained_event_model) event_model.load_state_dict(checkpoint['model_state_dict']) logging.info('Loaded pretrained event model: ' + option.pretrained_event_model) for epoch in range(option.epochs): epoch += 1 logging.info('Epoch ' + str(epoch)) # train set # train set avg_loss_e = 0 avg_loss_i = 0 avg_loss_s = 0 avg_loss = 0 avg_loss_sum = 0 avg_loss_event = 0 avg_loss_attr = 0 k = 0 # assume yago dataset is larger than atomic atomic_iterator = itertools.cycle(iter(train_data_loader)) yago_iterator = iter(yago_train_data_loader) # iterate over yago dataset (atomic dataset is cycled) for i, yago_batch in enumerate(yago_iterator): atomic_batch = next(atomic_iterator) optimizer.zero_grad() loss, loss_e, loss_i, loss_s = run_batch( option, atomic_batch, event_model, intent_model, event_scorer, intent_scorer, sentiment_classifier, criterion, sentiment_criterion) loss.backward() optimizer.step() avg_loss_e += loss_e.item() / option.report_every avg_loss_i += loss_i.item() / option.report_every avg_loss_s += loss_s.item() / option.report_every avg_loss += loss.item() / option.report_every if i % option.report_every == 0: logging.info( 'Atomic batch %d, loss_e=%.4f, loss_i=%.4f, loss_s=%.4f, loss=%.4f' % (i, avg_loss_e, avg_loss_i, avg_loss_s, avg_loss)) avg_loss_e = 0 avg_loss_i = 0 avg_loss_s = 0 avg_loss = 0 optimizer.zero_grad() loss_sum, loss_event, loss_attr = run_yago_batch( option, yago_batch, event_model, relation_models, yago_criterion, relation_scorer, event_scorer) loss_sum.backward() optimizer.step() avg_loss_sum += loss_sum.item() / option.report_every avg_loss_event += loss_event.item() / option.report_every avg_loss_attr += loss_attr.item() / option.report_every if i % option.report_every == 0: logging.info( 'YAGO batch %d, loss_event=%.4f, loss_attr=%.4f, loss=%.4f' % (i, avg_loss_event, avg_loss_attr, avg_loss_sum)) avg_loss_sum = 0 avg_loss_event = 0 avg_loss_attr = 0 # dev set if option.dev_dataset is not None: event_model.eval() intent_model.eval() event_scorer.eval() sentiment_classifier.eval() batch = next(iter(dev_data_loader)) with torch.no_grad(): loss, loss_e, loss_i, loss_s = run_batch( option, batch, event_model, intent_model, event_scorer, intent_scorer, sentiment_classifier, criterion, sentiment_criterion) logging.info( 'Eval on dev set, loss_e=%.4f, loss_i=%.4f, loss_s=%.4f, loss=%.4f' % (loss_e.item(), loss_i.item(), loss_s.item(), loss.item())) event_model.train() intent_model.train() event_scorer.train() sentiment_classifier.train() # dev set (yago) if option.yago_dev_dataset is not None: for key in relation_models.keys(): relation_models[key].eval() relation_scorer.eval() event_model.eval() yago_dev_batch = next(iter(yago_dev_data_loader)) with torch.no_grad(): loss_sum, loss_event, loss_attr = run_yago_batch( option, yago_dev_batch, event_model, relation_models, criterion_yago, relation_scorer, event_scorer) logging.info( 'Eval on yago dev set, loss_sum=%.4f, loss_event=%.4f, loss_attr=%.4f, ' % (loss_sum.item(), loss_event.item(), loss_attr.item())) for key in relation_models.keys(): relation_models[key].train() relation_scorer.train() if option.save_checkpoint != '': checkpoint = { 'event_model_state_dict': event_model.state_dict(), 'intent_model_state_dict': intent_model.state_dict(), 'event_scorer_state_dict': event_scorer.state_dict(), 'sentiment_classifier_state_dict': sentiment_classifier.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'relation_model_state_dict': { relation: relation_models[relation].state_dict() for relation in relation_models } } torch.save(checkpoint, option.save_checkpoint + '_' + str(epoch)) logging.info('Saved checkpoint: ' + option.save_checkpoint + '_' + str(epoch))
def main(option): logging.basicConfig( stream=sys.stdout, level=logging.DEBUG, format= '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s' ) torch.manual_seed(option.random_seed) glove = Glove(option.emb_file) logging.info('Embeddings loaded') embeddings = nn.Embedding(option.vocab_size, option.emb_dim, padding_idx=1) if option.model_type == 'NTN': event_model = NeuralTensorNetwork(embeddings, option.em_k) elif option.model_type == 'RoleFactor': event_model = RoleFactoredTensorModel(embeddings, option.em_k) elif option.model_type == 'LowRankNTN': event_model = LowRankNeuralTensorNetwork(embeddings, option.em_k, option.em_r) intent_model = BiLSTMEncoder(embeddings, option.im_hidden_size, option.im_num_layers) if option.em_actv_func == 'sigmoid': em_actv_func = nn.Sigmoid() elif option.em_actv_func == 'relu': em_actv_func = nn.ReLU() elif option.em_actv_func == 'tanh': em_actv_func = nn.Tanh() else: logging.info('Unknown event activation func: ' + option.em_actv_func) exit(1) event_scorer = nn.Sequential(nn.Linear(option.em_k, 1), em_actv_func) intent_scorer = nn.CosineSimilarity(dim=1) sentiment_classifier = nn.Linear(option.em_k, 1) criterion = MarginLoss(option.margin) sentiment_criterion = nn.BCEWithLogitsLoss() # load pretrained embeddings embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float()) if not option.update_embeddings: event_model.embeddings.weight.requires_grad = False if option.use_gpu: event_model.cuda() intent_model.cuda() sentiment_classifier.cuda() event_scorer.cuda() embeddings_param_id = [id(param) for param in embeddings.parameters()] params = [{ 'params': embeddings.parameters() }, { 'params': [ param for param in event_model.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }, { 'params': [ param for param in event_scorer.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }, { 'params': [ param for param in intent_model.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }, { 'params': [ param for param in sentiment_classifier.parameters() if id(param) not in embeddings_param_id ], 'weight_decay': option.weight_decay }] optimizer = torch.optim.Adagrad(params, lr=option.lr) # load checkpoint if provided: if option.load_checkpoint != '': checkpoint = torch.load(option.load_checkpoint) event_model.load_state_dict(checkpoint['event_model_state_dict']) intent_model.load_state_dict(checkpoint['intent_model_state_dict']) event_scorer.load_state_dict(checkpoint['event_scorer_state_dict']) sentiment_classifier.load_state_dict( checkpoint['sentiment_classifier_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) logging.info('Loaded checkpoint: ' + option.load_checkpoint) # load pretrained event model instead: elif option.pretrained_event_model != '': checkpoint = torch.load(option.pretrained_event_model) event_model.load_state_dict(checkpoint['model_state_dict']) logging.info('Loaded pretrained event model: ' + option.pretrained_event_model) train_dataset = EventIntentSentimentDataset() logging.info('Loading train dataset: ' + option.train_dataset) train_dataset.load(option.train_dataset, glove) logging.info('Loaded train dataset: ' + option.train_dataset) train_data_loader = torch.utils.data.DataLoader( train_dataset, collate_fn=EventIntentSentimentDataset_collate_fn, batch_size=option.batch_size, shuffle=True) if option.dev_dataset is not None: dev_dataset = EventIntentSentimentDataset() logging.info('Loading dev dataset: ' + option.dev_dataset) dev_dataset.load(option.dev_dataset, glove) logging.info('Loaded dev dataset: ' + option.dev_dataset) dev_data_loader = torch.utils.data.DataLoader( dev_dataset, collate_fn=EventIntentSentimentDataset_collate_fn, batch_size=len(dev_dataset), shuffle=False) for epoch in range(option.epochs): epoch += 1 logging.info('Epoch ' + str(epoch)) # train set avg_loss_e = 0 avg_loss_i = 0 avg_loss_s = 0 avg_loss = 0 for i, batch in enumerate(train_data_loader): i += 1 optimizer.zero_grad() loss, loss_e, loss_i, loss_s = run_batch( option, batch, event_model, intent_model, event_scorer, intent_scorer, sentiment_classifier, criterion, sentiment_criterion) loss.backward() optimizer.step() avg_loss_e += loss_e.item() / option.report_every avg_loss_i += loss_i.item() / option.report_every avg_loss_s += loss_s.item() / option.report_every avg_loss += loss.item() / option.report_every if i % option.report_every == 0: logging.info( 'Batch %d, loss_e=%.4f, loss_i=%.4f, loss_s=%.4f, loss=%.4f' % (i, avg_loss_e, avg_loss_i, avg_loss_s, avg_loss)) avg_loss_e = 0 avg_loss_i = 0 avg_loss_s = 0 avg_loss = 0 # dev set if option.dev_dataset is not None: event_model.eval() intent_model.eval() event_scorer.eval() sentiment_classifier.eval() batch = next(iter(dev_data_loader)) loss, loss_e, loss_i, loss_s = run_batch( option, batch, event_model, intent_model, event_scorer, intent_scorer, sentiment_classifier, criterion, sentiment_criterion) logging.info( 'Eval on dev set, loss_e=%.4f, loss_i=%.4f, loss_s=%.4f, loss=%.4f' % (loss_e.item(), loss_i.item(), loss_s.item(), loss.item())) event_model.train() intent_model.train() event_scorer.train() sentiment_classifier.train() if option.save_checkpoint != '': checkpoint = { 'event_model_state_dict': event_model.state_dict(), 'intent_model_state_dict': intent_model.state_dict(), 'event_scorer_state_dict': event_scorer.state_dict(), 'sentiment_classifier_state_dict': sentiment_classifier.state_dict(), 'optimizer_state_dict': optimizer.state_dict() } torch.save(checkpoint, option.save_checkpoint + '_' + str(epoch)) logging.info('Saved checkpoint: ' + option.save_checkpoint + '_' + str(epoch))
def train(epochs, ctx): """Training function.""" if isinstance(ctx, mx.Context): ctx = [ctx] net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx) opt_options = {'learning_rate': opt.lr, 'wd': opt.wd} if opt.optimizer == 'sgd': opt_options['momentum'] = 0.9 if opt.optimizer == 'adam': opt_options['epsilon'] = 1e-7 trainer = gluon.Trainer(net.collect_params(), opt.optimizer, opt_options, kvstore=opt.kvstore) if opt.lr_beta > 0.0: # Jointly train class-specific beta. # See "sampling matters in deep embedding learning" paper for details. beta.initialize(mx.init.Constant(opt.beta), ctx=ctx) trainer_beta = gluon.Trainer([beta], 'sgd', { 'learning_rate': opt.lr_beta, 'momentum': 0.9 }, kvstore=opt.kvstore) loss = MarginLoss(margin=opt.margin, nu=opt.nu) best_val = 0.0 for epoch in range(epochs): tic = time.time() prev_loss, cumulative_loss = 0.0, 0.0 # Learning rate schedule. trainer.set_learning_rate(get_lr(opt.lr, epoch, steps, opt.factor)) logging.info('Epoch %d learning rate=%f', epoch, trainer.learning_rate) if opt.lr_beta > 0.0: trainer_beta.set_learning_rate( get_lr(opt.lr_beta, epoch, steps, opt.factor)) logging.info('Epoch %d beta learning rate=%f', epoch, trainer_beta.learning_rate) # Inner training loop. for i in range(200): batch = train_data.next() data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) Ls = [] with ag.record(): for x, y in zip(data, label): a_indices, anchors, positives, negatives, _ = net(x) if opt.lr_beta > 0.0: L = loss(anchors, positives, negatives, beta, y[a_indices]) else: L = loss(anchors, positives, negatives, opt.beta, None) # Store the loss and do backward after we have done forward # on all GPUs for better speed on multiple GPUs. Ls.append(L) cumulative_loss += nd.mean(L).asscalar() for L in Ls: L.backward() # Update. trainer.step(batch.data[0].shape[0]) if opt.lr_beta > 0.0: trainer_beta.step(batch.data[0].shape[0]) if (i + 1) % opt.log_interval == 0: logging.info('[Epoch %d, Iter %d] training loss=%f' % (epoch, i + 1, cumulative_loss - prev_loss)) prev_loss = cumulative_loss logging.info('[Epoch %d] training loss=%f' % (epoch, cumulative_loss)) logging.info('[Epoch %d] time cost: %f' % (epoch, time.time() - tic)) names, val_accs = test(ctx) for name, val_acc in zip(names, val_accs): logging.info('[Epoch %d] validation: %s=%f' % (epoch, name, val_acc)) if val_accs[0] > best_val: best_val = val_accs[0] logging.info('Saving %s.' % opt.save_model_prefix) net.collect_params().save('%s.params' % opt.save_model_prefix) return best_val
elif option.model == 'LowRankNTN': event_model = LowRankNeuralTensorNetwork(embeddings, option.em_k, option.em_r) elif option.model == 'RoleFactor': event_model = RoleFactoredTensorModel(embeddings, option.em_k) elif option.model == 'Predicate': event_model = PredicateTensorModel(embeddings) elif option.model == 'NN': event_model = NN(embeddings, 2 * option.em_k, option.em_k) elif option.model == 'EMC': event_model = EMC(embeddings, 2 * option.em_k, option.em_k) else: logging.info('Unknwon model: ' + option.model) exit(1) cosine_similarity = nn.CosineSimilarity(dim=1) criterion = MarginLoss(option.margin) # load pretrained embeddings embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float()) if not option.update_embeddings: event_model.embeddings.weight.requires_grad = False if option.use_gpu: event_model.cuda() cosine_similarity.cuda() criterion.cuda() embedding_param_id = [id(param) for param in embeddings.parameters()] params = [{ 'params': embeddings.parameters()