Exemplo n.º 1
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
Exemplo n.º 2
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])

    def update(self, batch):
        inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(
            batch, self.opt['cuda'])

        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, pooling_output = self.model(inputs)
        loss = self.criterion(logits, labels)
        # l2 decay on all conv layers
        if self.opt.get('conv_l2', 0) > 0:
            loss += self.model.conv_l2() * self.opt['conv_l2']
        # l2 penalty on output representations
        if self.opt.get('pooling_l2', 0) > 0:
            loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean()
        loss_val = loss.item()
        # backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                       self.opt['max_grad_norm'])
        self.optimizer.step()
        return loss_val

    def predict(self, batch, unsort=True):
        inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(
            batch, self.opt['cuda'])
        orig_idx = batch[11]
        # forward
        self.model.eval()
        logits, _ = self.model(inputs)
        print("logits:", logits.shape)
        loss = self.criterion(logits, labels)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        # predictions = np.argmax(probs.cpu().numpy(), axis=1).tolist()
        if unsort:
            _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
                    predictions, probs)))]
        return predictions, probs, loss.item()
Exemplo n.º 3
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [p for p in self.model.parameters() if p.requires_grad]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])

    def update(self, batch):
        inputs, labels, lens = unpack_batch(batch, self.opt['cuda'])

        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, pooling_output, g, sparse_graph, c1, c2 = self.model(inputs)
        loss = self.criterion(logits, labels)
        # l2 decay on all conv layers
        if self.opt.get('conv_l2', 0) > 0:
            loss += self.model.conv_l2() * self.opt['conv_l2']
        # l2 penalty on output representations
        # if self.opt.get('pooling_l2', 0) > 0:
        #     loss += self.opt['pooling_l2'] * (pooling_output ** 2).sum(1).mean()
        # loss += 0.0000001 * self.hloss(g.view(-1,g.shape[1]))
        # loss += 0.0000000001 * torch.norm(torch.abs(g.view(-1, g.shape[-1])-sparse_graph.view(-1, sparse_graph.shape[-1])))
        # c1l = c1.pow(2).sum(1).sqrt().unsqueeze(1)
        # c2l = c2.pow(2).sum(1).sqrt().unsqueeze(1)
        # loss_pred = -(torch.mm(c1, c2.transpose(0,1)) / torch.mm(c1l, c2l.transpose(0,1))).diag().abs().mean() + (c1l-c2l).abs().mean()
        c2 = torch.max(c2, 1)[1]
        loss_pred = self.criterion(c1, c2)
        loss += loss_pred
        loss_val = loss.item()
        # backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm'])
        self.optimizer.step()
        return loss_val, loss_pred.item()

    def predict(self, batch, unsort=True):
        inputs, labels, lens = unpack_batch(batch, self.opt['cuda'])
        # orig_idx = batch[11]
        # forward
        self.model.eval()
        logits, _, _, _, _, _ = self.model(inputs)
        loss = self.criterion(logits, labels)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        # if unsort:
        #     _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
        #             predictions, probs)))]
        return predictions, probs, loss.item()
Exemplo n.º 4
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.criterion = nn.CrossEntropyLoss(reduction="none")
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     self.crf = CRF(self.opt['num_class'], batch_first=True)
     self.bc = nn.BCELoss()
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
         self.crf.cuda()
         self.bc.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
    def __init__(self, model_files):

        self.models = []

        for model_file in model_files:
            opt = torch_utils.load_config(model_file)
            model = GCNClassifier(opt)
            checkpoint = self.get_checkpoint(model_file)
            model.load_state_dict(checkpoint['model'])

            if opt['cuda']:
                model.cuda()

            self.models.append(model)
    def __init__(self,
                 model_stuff_list: List[ModelStuff],
                 biassed_prediction=None):
        self.id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])
        self.models = OrderedDict()
        self.biassed_prediction = biassed_prediction

        for model_stuff in model_stuff_list:
            self.models[model_stuff.representation] = []

            for model_file in model_stuff.files:
                opt = torch_utils.load_config(model_file)
                model = GCNClassifier(opt)
                checkpoint = self.get_checkpoint(model_file)
                model.load_state_dict(checkpoint['model'])

                if opt['cuda']:
                    model.cuda()

                self.models[model_stuff.representation].append(model)
Exemplo n.º 7
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])

    def update(self, batch):
        losses = {}
        # inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda'])
        inputs, labels, orig_idx = maybe_place_batch_on_cuda(
            batch, self.opt['cuda'])

        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, pooling_output, suppplemental_losses = self.model(inputs)
        main_loss = self.criterion(logits, labels)
        losses['re_loss'] = main_loss.data.item()
        # l2 decay on all conv layers
        if self.opt.get('conv_l2', 0) > 0:
            conv_l2_loss = self.model.conv_l2() * self.opt['conv_l2']
            main_loss += conv_l2_loss
            losses['conv_l2'] = conv_l2_loss.data.item()
        # l2 penalty on output representations
        if self.opt.get('pooling_l2', 0) > 0:
            pooling_l2_loss = self.opt['pooling_l2'] * (pooling_output**
                                                        2).sum(1).mean()
            main_loss += pooling_l2_loss
            losses['pooling_l2'] = pooling_l2_loss.data.item()
        if self.opt['link_prediction'] is not None:
            label_smoothing = self.opt['link_prediction']['label_smoothing']
            observed_loss = suppplemental_losses['observed'] * (
                1. - self.opt['link_prediction']['without_observed'])
            predicted_loss = suppplemental_losses['baseline'] * (
                1. - self.opt['link_prediction']['without_verification'])
            main_loss += (observed_loss + predicted_loss
                          ) * self.opt['link_prediction']['lambda']
            observed_loss_value = observed_loss.data.item()
            predicted_loss_value = predicted_loss.data.item()
            losses.update({
                'kg_observed': observed_loss_value,
                'kg_predicted': predicted_loss_value
            })
        loss_val = main_loss.item()
        # backward
        main_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                       self.opt['max_grad_norm'])
        self.optimizer.step()
        return loss_val

    def predict(self, batch, unsort=True):
        # inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda'])
        inputs, labels, orig_idx = maybe_place_batch_on_cuda(
            batch, self.opt['cuda'])
        # orig_idx = batch[11]
        # forward
        self.model.eval()
        logits, _, _ = self.model(inputs)
        loss = self.criterion(logits, labels)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        if unsort:
            _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
                    predictions, probs)))]
        return predictions, probs, loss.item()
Exemplo n.º 8
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss(reduction="none")
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        self.crf = CRF(self.opt['num_class'], batch_first=True)
        self.bc = nn.BCELoss()
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
            self.crf.cuda()
            self.bc.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])

    def update(self, batch):
        inputs, labels, sent_labels, dep_path, tokens, head, lens = unpack_batch(
            batch, self.opt['cuda'])

        _, _, _, _, terms, _, _ = inputs

        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, class_logits, selections, term_def, not_term_def, term_selections = self.model(
            inputs)

        labels = labels - 1
        labels[labels < 0] = 0
        mask = inputs[1].float()
        mask[mask == 0.] = -1.
        mask[mask == 1.] = 0.
        mask[mask == -1.] = 1.
        mask = mask.byte()
        loss = -self.crf(logits, labels, mask=mask)

        sent_loss = self.bc(class_logits, sent_labels)
        loss += self.opt['sent_loss'] * sent_loss

        selection_loss = self.bc(selections.view(-1, 1), dep_path.view(-1, 1))
        loss += self.opt['dep_path_loss'] * selection_loss

        term_def_loss = -self.opt['consistency_loss'] * (term_def -
                                                         not_term_def)
        loss += term_def_loss
        #loss += self.opt['consistency_loss'] * not_term_def

        term_loss = self.opt['sent_loss'] * self.bc(
            term_selections.view(-1, 1),
            terms.float().view(-1, 1))
        loss += term_loss

        loss_val = loss.item()
        # backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                       self.opt['max_grad_norm'])
        self.optimizer.step()
        return loss_val, sent_loss.item(), term_loss.item()

    def predict(self, batch, unsort=True):
        inputs, labels, sent_labels, dep_path, tokens, head, lens = unpack_batch(
            batch, self.opt['cuda'])

        orig_idx = batch[-1]
        # forward
        self.model.eval()
        logits, sent_logits, _, _, _, _ = self.model(inputs)

        labels = labels - 1
        labels[labels < 0] = 0
        mask = inputs[1].float()
        mask[mask == 0.] = -1.
        mask[mask == 1.] = 0.
        mask[mask == -1.] = 1.
        mask = mask.byte()
        loss = -self.crf(logits, labels, mask=mask)

        # self.crf.transitions[0][4] = -1
        # self.crf.transitions[0][5] = -1
        # self.crf.transitions[0][6] = -1
        # self.crf.transitions[1][5] = -1
        # self.crf.transitions[1][6] = -1

        probs = F.softmax(logits, dim=1)
        predictions = self.crf.decode(logits, mask=mask)

        sent_predictions = sent_logits.round().long().data.cpu().numpy()

        if unsort:
            _, predictions, probs, sent_predictions = [
                list(t) for t in zip(*sorted(
                    zip(orig_idx, predictions, probs, sent_predictions)))
            ]
        return predictions, probs, loss.item(), sent_predictions
Exemplo n.º 9
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None, ucca_embedding_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.ucca_embedding_matrix = ucca_embedding_matrix
        self.model = GCNClassifier(opt,
                                   emb_matrix=emb_matrix,
                                   ucca_embedding_matrix=ucca_embedding_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = get_optimizer(opt['optim'], self.parameters,
                                       opt['lr'])

    def update(self, batch):
        input, labels = self.unpack_batch(batch, self.opt['cuda'])

        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, pooling_output = self.model(input)
        loss = self.criterion(logits, labels)

        # l2 decay on all conv layers
        if self.opt.get('conv_l2', 0) > 0:
            loss += self.model.conv_l2() * self.opt['conv_l2']

        # l2 penalty on output representations
        if self.opt.get('pooling_l2', 0) > 0:
            loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean()

        loss_val = loss.item()

        # backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                       self.opt['max_grad_norm'])
        self.optimizer.step()

        return loss_val

    def predict(self, batch, unsort=True):
        input, labels = self.unpack_batch(batch, self.opt['cuda'])

        orig_idx = input.orig_idx
        ids = input.id

        # forward
        self.model.eval()
        logits, _ = self.model(input)
        loss = self.criterion(logits, labels)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        if unsort:
            _, predictions, probs, ids = [
                list(t)
                for t in zip(*sorted(zip(orig_idx, predictions, probs, ids)))
            ]

        return predictions, probs, loss.item(), ids

    def predict_with_confidence(self, batch, unsort=True):
        input, labels = self.unpack_batch(batch, self.opt['cuda'])

        orig_idx = input.orig_idx
        ids = input.id

        # forward
        self.model.eval()
        logits, _ = self.model(input)
        loss = self.criterion(logits, labels)

        probs = np.max(F.softmax(logits, 1).data.cpu().numpy(),
                       axis=1).tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()

        if unsort:
            _, predictions, probs, ids = [
                list(t)
                for t in zip(*sorted(zip(orig_idx, predictions, probs, ids)))
            ]

        return predictions, probs, loss.item(), ids

    def plural_predict(self, batch, plurality=2):
        input, labels = self.unpack_batch(batch, self.opt['cuda'])

        orig_idx = input.orig_idx
        ids = input.id

        # forward
        self.model.eval()
        logits, _ = self.model(input)

        ordered_predictions = np.argsort(-logits.data.cpu().numpy(), axis=1)
        predictions = []

        for i in range(plurality):

            prediction = ordered_predictions[:, i].tolist()
            _, prediction = [
                list(t) for t in zip(*sorted(zip(orig_idx, prediction)))
            ]

            predictions.append(prediction)

        _, ids = [list(t) for t in zip(*sorted(zip(orig_idx, ids)))]

        return predictions, ids

    def unpack_batch(self, batch, cuda):

        words = set_cuda(get_long_tensor(batch.word, batch.batch_size), cuda)
        masks = set_cuda(torch.eq(words, 0), cuda)
        pos = set_cuda(get_long_tensor(batch.pos, batch.batch_size), cuda)
        ner = set_cuda(get_long_tensor(batch.ner, batch.batch_size), cuda)
        coref = set_cuda(get_long_tensor(batch.coref, batch.batch_size), cuda)
        ucca_enc = set_cuda(get_long_tensor(batch.ucca_enc, batch.batch_size),
                            cuda)

        rel = set_cuda(torch.LongTensor(batch.rel), cuda)

        input = Input(batch_size=batch.batch_size,
                      word=words,
                      mask=masks,
                      pos=pos,
                      ner=ner,
                      coref=coref,
                      ucca_enc=ucca_enc,
                      len=batch.len,
                      head=batch.head,
                      ucca_head=batch.ucca_head,
                      ucca_multi_head=batch.ucca_multi_head,
                      ucca_dist_from_mh_path=batch.ucca_dist_from_mh_path,
                      subj_p=batch.subj_p,
                      obj_p=batch.obj_p,
                      id=batch.id,
                      orig_idx=batch.orig_idx)

        return input, rel