def main(option): logging.basicConfig( stream=sys.stdout, level=logging.DEBUG, format= '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s' ) dataset = TransitiveSentenceSimilarityDataset() if option.dataset_cache is None: glove = Glove(option.emb_file) logging.info('Embeddings loaded') dataset.load(option.dataset_file, glove) else: dataset.load_cache(option.dataset_cache) logging.info('Dataset loaded') embeddings = nn.Embedding(option.vocab_size, option.emb_dim, padding_idx=1) if option.model == 'NTN': model = NeuralTensorNetwork(embeddings, option.em_k) elif option.model == 'LowRankNTN': model = LowRankNeuralTensorNetwork(embeddings, option.em_k, option.em_r) elif option.model == 'RoleFactor': model = RoleFactoredTensorModel(embeddings, option.em_k) elif option.model == 'Predicate': model = PredicateTensorModel(embeddings) elif option.model == 'NN': model = NN(embeddings, 2 * option.em_k, option.em_k) elif option.model == 'EMC': model = EMC(embeddings, 2 * option.em_k, option.em_k) else: logging.info('Unknown model type: ' + option.model) exit(1) checkpoint = torch.load(option.model_file, map_location='cpu') if type(checkpoint) == dict: if 'event_model_state_dict' in checkpoint: state_dict = checkpoint['event_model_state_dict'] else: state_dict = checkpoint['model_state_dict'] else: state_dict = checkpoint model.load_state_dict(state_dict) logging.info(option.model_file + ' loaded') # embeddings = nn.Embedding(option.vocab_size, option.emb_dim, padding_idx=1) # embeddings.weight.data = torch.from_numpy(glove.embd).float() # model = Averaging(embeddings) if option.use_gpu: model.cuda() model.eval() data_loader = torch.utils.data.DataLoader( dataset, collate_fn=TransitiveSentenceSimilarityDataset_collate_fn, shuffle=False, batch_size=len(dataset)) batch = next(iter(data_loader)) e1_subj_id, e1_subj_w, e1_verb_id, e1_verb_w, e1_obj_id, e1_obj_w, \ e2_subj_id, e2_subj_w, e2_verb_id, e2_verb_w, e2_obj_id, e2_obj_w, \ gold = batch if option.use_gpu: e1_subj_id = e1_subj_id.cuda() e1_subj_w = e1_subj_w.cuda() e1_verb_id = e1_verb_id.cuda() e1_verb_w = e1_verb_w.cuda() e1_obj_id = e1_obj_id.cuda() e1_obj_w = e1_obj_w.cuda() e2_subj_id = e2_subj_id.cuda() e2_subj_w = e2_subj_w.cuda() e2_verb_id = e2_verb_id.cuda() e2_verb_w = e2_verb_w.cuda() e2_obj_id = e2_obj_id.cuda() e2_obj_w = e2_obj_w.cuda() e1_emb = model(e1_subj_id, e1_subj_w, e1_verb_id, e1_verb_w, e1_obj_id, e1_obj_w) e2_emb = model(e2_subj_id, e2_subj_w, e2_verb_id, e2_verb_w, e2_obj_id, e2_obj_w) if option.distance_metric == 'cosine': distance_func = cosine_distance elif option.distance_metric == 'euclid': distance_func = euclid_distance pred = -distance_func(e1_emb, e2_emb) if option.use_gpu: pred = pred.cpu() pred = pred.detach().numpy() gold = gold.numpy() spearman_correlation, spearman_p = scipy.stats.spearmanr(pred, gold) if option.output_file.strip() != '': output_file = open(option.output_file, 'w') for score in pred: output_file.write(str(score) + '\n') output_file.close() logging.info('Output saved to ' + option.output_file) logging.info('Spearman correlation: %.4f' % (spearman_correlation, ))
event_model = NeuralTensorNetwork(embeddings, option.em_k) elif option.model == 'RoleFactor': event_model = RoleFactoredTensorModel(embeddings, option.em_k) else: logging.info('Unknwon model: ' + option.model) exit(1) criterion = nn.CrossEntropyLoss() # load pretrained embeddings embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float()) if not option.update_embeddings: event_model.embeddings.weight.requires_grad = False if option.use_gpu: event_model.cuda() neg_embeddings.cuda() criterion.cuda() params = [{ 'params': event_model.embeddings.parameters() }, { 'params': neg_embeddings.parameters() }] if option.model == 'NTN': params += [{ 'params': event_model.subj_verb_comp.parameters(), 'weight_decay': option.weight_decay }, { 'params': event_model.verb_obj_comp.parameters(), 'weight_decay': option.weight_decay