Пример #1
0
 def __init__(self, dataset, q_word_emb, q_dep_emb, rel_word_emb, rel_id_emb, use_attn=False, use_constraint=False,
              constraint_word_emb=None, constraint_id_emb=None, always_pooling=False, use_el_score=False,
              use_prior_weights=False, q_word_emb_dim=300, q_dep_emb_dim=300, lstm_hidden_dim=300, rel_word_emb_dim=300,
              rel_id_emb_dim=300, attn_q_dim=600, attn_rel_dim=600, attn_hid_dim=100, dropout=0.0, max_seq_len=13,
              linear_hid_dim=1024, output_dim=2, max_epoch=20, optim='adam', lr=0.001, reward_threshold=0.5,
              pooling_threshold=1, batch_size=64, momentum=0, weight_decay=0, lr_gamma=0.1):
     self.sudo_batch = 1
     self.batch_size = batch_size
     self.max_epoch = max_epoch
     self.reward_threshold = cuda_wrapper(torch.tensor(reward_threshold))
     self.pooling_threshold = pooling_threshold
     self.use_constraint = use_constraint
     self.use_attn = use_attn
     self.always_pooling = always_pooling
     self.use_el_score = use_el_score
     self.use_prior_weights = use_prior_weights
     self.data_loader = dataset
     self.model = cuda_wrapper(ComplexWebQSP_Model(
         q_word_emb=q_word_emb, q_dep_emb=q_dep_emb, rel_word_emb=rel_word_emb,
         rel_id_emb=rel_id_emb, use_constraint=use_constraint, use_attn=use_attn,
         constraint_id_emb=constraint_id_emb, constraint_word_emb=constraint_word_emb,
         q_word_emb_dim=q_word_emb_dim, q_dep_emb_dim=q_dep_emb_dim,
         lstm_hidden_dim=lstm_hidden_dim, word_emb_dim=rel_word_emb_dim,
         id_emb_dim=rel_id_emb_dim, attn_q_dim=attn_q_dim, attn_rel_dim=attn_rel_dim,
         attn_hid_dim=attn_hid_dim, max_seq_len=max_seq_len, dropout=dropout,
         linear_hid_dim=linear_hid_dim, output_dim=output_dim, use_rel_id=True))
     if optim.lower() == 'adam':
         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay)
     elif optim.lower() == 'sgd':
         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
     self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, gamma=lr_gamma, milestones=[3, 5, 7])
     sys.stderr.write("Train model config:\n q_word_emb: {}, q_dep_emb: {}\n".format(q_word_emb.shape, q_dep_emb.shape))
Пример #2
0
    def __init__(self, dataset_loader, q_word_to_idx, q_dep_to_idx, rel_word_to_idx, rel_id_to_idx, constraint_word_to_idx,
                 constraint_id_to_idx, q_word_emb, q_dep_emb, rel_word_emb, rel_id_emb, constraint_word_emb,
                 constraint_id_emb, device=0, test_batch_size=128, use_constraint=True, use_attn=True):
        torch.backends.cudnn.enabled = False
        torch.cuda.set_device(device)
        print "device: {}".format(torch.cuda.current_device())
        self.reward_threshold = 0.5
        self.test_batch_size = test_batch_size
        self.q_word_to_idx = q_word_to_idx
        self.q_dep_to_idx = q_dep_to_idx
        self.rel_word_to_idx = rel_word_to_idx
        self.rel_id_to_idx = rel_id_to_idx
        self.constraint_word_to_idx = constraint_word_to_idx
        self.constraint_id_to_idx = constraint_id_to_idx
        self.q_word_emb = q_word_emb
        self.q_dep_emb = q_dep_emb
        self.rel_word_emb = rel_word_emb
        self.rel_id_emb = rel_id_emb
        self.constraint_word_emb = constraint_word_emb
        self.constraint_id_emb = constraint_id_emb
        self.rel_idx_to_id = {}
        self.rel_idx_to_word = {}
        self.word_idx_to_q_word = {}
        self.constraint_idx_to_id = {}
        self.use_constraint = use_constraint
        self.use_attn = use_attn
        self.data_loader = dataset_loader
        for key in self.rel_id_to_idx.keys():
            val = self.rel_id_to_idx[key]
            self.rel_idx_to_id[val] = key
        for key in self.q_word_to_idx.keys():
            val = self.q_word_to_idx[key]
            self.word_idx_to_q_word[val] = key
        if self.use_constraint:
            for key in self.constraint_id_to_idx.keys():
                val = self.constraint_id_to_idx[key]
                self.constraint_idx_to_id[val] = key
        for key in self.rel_word_to_idx.keys():
            val = self.rel_word_to_idx[key]
            self.rel_idx_to_word[val] = key

        self.model = cuda_wrapper(ComplexWebQSP_Model(
            q_word_emb=self.q_word_emb, q_dep_emb=self.q_dep_emb, rel_word_emb=self.rel_word_emb,
            rel_id_emb=self.rel_id_emb, use_constraint=self.use_constraint,
            constraint_id_emb=self.constraint_id_emb, constraint_word_emb=self.constraint_word_emb, use_attn=self.use_attn
        ))
        sys.stderr.write("Testing model configuration:\n q_word_emb: {}, q_dep_emb: {}\n".format(
            self.model.q_encoder.q_word_emb.weight.shape, self.model.q_encoder.q_dep_emb.weight.shape))
Пример #3
0
    def eval(self):
        criterion = Dynamic_Cross_Entropy_Loss()
        criterion.eval()
        self.model.eval()
        loss = 0
        with torch.no_grad():
            for idx in range(len(self.data_loader)):
                data_dict = self.data_loader.get_one_batch(idx)
                padded_q_word, q_word_lengths, padded_q_dep, q_dep_lengths, padded_rel_words, rel_word_lengths, batch_rel_ids, \
                padded_cons_words, cons_word_lengths, padded_cons_id, cons_id_lengths, batch_prior_weights, \
                batch_labels = self.unpack_data_dict(data_dict)
                if not self.data_loader.cpu_data:
                    out = self.model(padded_q_word_seq=padded_q_word, q_word_lengths=q_word_lengths,
                                     padded_q_dep_seq=padded_q_dep, q_dep_lengths=q_dep_lengths,
                                     padded_rel_words_seq=padded_rel_words, rel_word_lengths=rel_word_lengths,
                                     rel_ids=batch_rel_ids, padded_constraint_words_seq=padded_cons_words,
                                     constraint_word_lengths=cons_word_lengths, padded_constraint_ids=padded_cons_id,
                                     constraint_id_lengths=cons_id_lengths, pooling=self.use_constraint)
                else:
                    out = self.model(padded_q_word_seq=cuda_wrapper(padded_q_word), q_word_lengths=q_word_lengths,
                                     padded_q_dep_seq=cuda_wrapper(padded_q_dep), q_dep_lengths=q_dep_lengths,
                                     padded_rel_words_seq=cuda_wrapper(padded_rel_words),
                                     rel_word_lengths=rel_word_lengths, rel_ids=cuda_wrapper(batch_rel_ids),
                                     padded_constraint_words_seq=cuda_wrapper(padded_cons_words),
                                     constraint_word_lengths=cons_word_lengths,
                                     padded_constraint_ids=cuda_wrapper(padded_cons_id),
                                     constraint_id_lengths=cons_id_lengths, pooling=self.use_constraint)
                if self.use_prior_weights:
                    loss += criterion.forward(out, cuda_wrapper(batch_labels.long()), cuda_wrapper(batch_prior_weights)).item()
                else:
                    loss += criterion.forward(out, cuda_wrapper(batch_labels.long()), None).item()

        self.model.train()
        for param in self.model.parameters():
            param.requires_grad = True
        return loss
Пример #4
0
    def train(self, save_dir=None):
        criterion = Dynamic_Cross_Entropy_Loss()
        padded_q_word, q_word_lengths, padded_q_dep, q_dep_lengths, padded_rel_words, rel_word_lengths, \
        batch_rel_ids, padded_cons_words, cons_word_lengths, padded_cons_id, cons_id_lengths, batch_prior_weights, \
        batch_labels = None, None, None, None, None, None, None, None, None, None, None, None, None
        epoch_loss_history = [float("inf"), ]
        sys.stderr.write('Max Epoch: {}\n'.format(self.max_epoch))
        for epoch in range(self.max_epoch):
            self.scheduler.step(epoch)
            progbar = Progbar(len(self.data_loader), file=sys.stderr)
            prog_idx = 0
            shuffled_indices = random.sample(range(len(self.data_loader)), len(self.data_loader))
            random.shuffle(shuffled_indices)
            for curr_index in shuffled_indices:
                data_dict = self.data_loader.get_one_batch(curr_index)
                padded_q_word, q_word_lengths, padded_q_dep, q_dep_lengths, padded_rel_words, rel_word_lengths, batch_rel_ids, \
                padded_cons_words,cons_word_lengths, padded_cons_id, cons_id_lengths, batch_prior_weights, batch_labels \
                    = self.unpack_data_dict(data_dict)
                # print "curr_idx: {}".format(curr_index)
                # print "padded_q_word: {}".format(padded_q_word)
                self.model.zero_grad()
                self.optimizer.zero_grad()
                # train
                if epoch <= self.pooling_threshold:
                    if self.data_loader.use_entity_type:
                        self.model.q_encoder.q_word_emb.weight[50:].requres_grad = False
                    else:
                        self.model.q_encoder.q_word_emb.weight[4:].requres_grad = False

                '''don't need VAR(use_constraint) because model has been initialized'''
                out = None # placeholder
                if not self.data_loader.cpu_data:
                    out = self.model(padded_q_word_seq=padded_q_word, q_word_lengths=q_word_lengths,
                                     padded_q_dep_seq=padded_q_dep, q_dep_lengths=q_dep_lengths,
                                     padded_rel_words_seq=padded_rel_words, rel_word_lengths=rel_word_lengths,
                                     rel_ids=batch_rel_ids, padded_constraint_words_seq=padded_cons_words,
                                     constraint_word_lengths=cons_word_lengths, padded_constraint_ids=padded_cons_id,
                                     constraint_id_lengths=cons_id_lengths, pooling=self.pooling_criterion(epoch))
                else:
                    out = self.model(padded_q_word_seq=cuda_wrapper(padded_q_word), q_word_lengths=q_word_lengths,
                                     padded_q_dep_seq=cuda_wrapper(padded_q_dep), q_dep_lengths=q_dep_lengths,
                                     padded_rel_words_seq=cuda_wrapper(padded_rel_words),
                                     rel_word_lengths=rel_word_lengths, rel_ids=cuda_wrapper(batch_rel_ids),
                                     padded_constraint_words_seq=cuda_wrapper(padded_cons_words),
                                     constraint_word_lengths=cons_word_lengths,
                                     padded_constraint_ids=cuda_wrapper(padded_cons_id),
                                     constraint_id_lengths=cons_id_lengths, pooling=self.pooling_criterion(epoch))
                loss = None # placeholder
                if self.use_prior_weights:
                    loss = criterion.forward(out, cuda_wrapper(batch_labels.long()), cuda_wrapper(batch_prior_weights))
                else:
                    loss = criterion.forward(out, cuda_wrapper(batch_labels.long()), None)
                loss.backward()
                # print "epoch: {}, iter: {}, loss: {}".format(epoch, prog_idx, loss.item())
                self.optimizer.step()
                if epoch <= self.pooling_threshold:
                    self.model.q_encoder.q_word_emb.weight.requires_grad = True
                if self.use_constraint:
                    self.model.query_graph_encoder.constraint_word_emb.weight.requires_grad = True
                    self.model.query_graph_encoder.constraint_id_emb.weight.requires_grad = True
                progbar.update(prog_idx + 1, [("loss", loss.item())])
                prog_idx += 1
            #epoch_loss = self.eval()
            #epoch_loss_history.append(epoch_loss)
            #sys.stderr.write("Epoch: {}, Loss: {}\n".format(epoch, epoch_loss))

            #print "Epoch: {}, Loss: {}".format(epoch, epoch_loss)
            if epoch == self.max_epoch - 1 or epoch % 3 == 2:
                epoch_loss = self.eval()
                sys.stderr.write("Epoch: {}, Loss: {}\n".format(epoch, epoch_loss))
            if save_dir is not None:
                check_point = {
                    #'loss': epoch_loss,
                    'state_dict': self.model.state_dict()
                }
                torch.save(check_point, os.path.join(save_dir, str(epoch)))
Пример #5
0
    sys.stderr.write("Finish initializing data loader...\n")
    tester = Tester(dataset_loader=data_loader, q_word_to_idx=q_word_to_idx, q_dep_to_idx=q_dep_to_idx,
                    rel_word_to_idx=rel_word_to_idx, rel_id_to_idx=rel_id_to_idx, constraint_word_to_idx=constraint_word_to_idx,
                    constraint_id_to_idx=constraint_id_to_idx, q_word_emb=q_word_emb, q_dep_emb=q_dep_emb,
                    rel_word_emb=rel_word_emb, rel_id_emb=rel_id_emb, constraint_word_emb=constraint_word_emb,
                    constraint_id_emb=constraint_id_emb, use_attn=use_attn, use_constraint=use_constraint)
    tester.data_loader.write_sub1_file(sub1_path)
    tester_inferencer = Tester_Interface(ques_src=ques_src, sub1_flat_file_path=sub1_path, sub1_cands_dir=sub1_cands_dir,
                                         sub2_cands_dir=sub2_cands_dir)
    sys.stderr.write('Finish initializing tester and tester inferencer...\n')
    model_path = '/public/ComplexWebQuestions_Resources/0418_nodep/5'
    model_name = '5'
    model = cuda_wrapper(ComplexWebQSP_Model(
            q_word_emb=q_word_emb, q_dep_emb=q_dep_emb, rel_word_emb=rel_word_emb,
            rel_id_emb=rel_id_emb, use_constraint=use_constraint, use_attn=use_attn,
            constraint_id_emb=constraint_id_emb, constraint_word_emb=constraint_word_emb,
            max_seq_len=33, attn_hid_dim=500)
    )
    cp = torch.load(model_path)
    model.load_state_dict(cp['state_dict'])
    sys.stderr.write('Finish init model\n')
    tester.viz_model_prediction(model, model_name, running_output, sub1_path, sub2_path, tester_inferencer)
    sys.stderr.write("Start testing n_best f1 score for queries...\n")
    n_bests = [25, 10, 5, 1]
    for n_best in n_bests:
        print "n_best: {}".format(n_best)
        tester_inferencer.get_average_f1('/public/ComplexWebQuestions_Resources/0418_nodep/test_results/5_prediction.csv', n_best)