Exemplo n.º 1
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.model.to(self.opt["device"])
     if opt['cuda']:
         self.model.cuda()
Exemplo n.º 2
0
 def __init__(self, opt, lbstokens, emb_matrix=None, cls=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     if cls is None:
         self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix)
     else:
         self.model = GCNClassifier(opt,
                                    lbstokens,
                                    emb_matrix=emb_matrix,
                                    cls=cls)
     self.cls = cls
     self.rel_types = len(constant.LABEL_TO_ID)
     self.loss_matrix = torch.zeros((self.rel_types, self.rel_types),
                                    requires_grad=False)
     self.miss_matrix = torch.zeros((self.rel_types, self.rel_types),
                                    requires_grad=False)
     # self.alpha=torch.full((1,),0.1,requires_grad=True)
     # self.beta=torch.full((1,),0.1,requires_grad=True)
     #self.model = nn.DataParallel(GCNClassifier(opt, emb_matrix=emb_matrix),device_ids=[0,1,2,3])
     #self.model.half()
     print(self.get_parameter_number(self.model))
     self.soft_criterion = SoftCrossEntropyLoss()
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
         self.loss_matrix = self.loss_matrix.cuda()
         self.miss_matrix = self.miss_matrix.cuda()
     self.optimizer = torch_utils.get_optimizer(
         opt['optim'], [{
             'params': self.parameters
         }], opt['lr'])
Exemplo n.º 3
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Exemplo n.º 4
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])

    def update(self, batch):
        inputs, labels, tokens, head, first_pos, second_pos, third_pos, cross, lens = unpack_batch(
            batch, self.opt['cuda'])

        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, pooling_output = self.model(inputs)
        loss = self.criterion(logits, labels)
        # l2 decay on all conv layers
        if self.opt.get('conv_l2', 0) > 0:
            loss += self.model.conv_l2() * self.opt['conv_l2']
        # l2 penalty on output representations
        if self.opt.get('pooling_l2', 0) > 0:
            loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean()
        loss_val = loss.item()
        # backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                       self.opt['max_grad_norm'])
        self.optimizer.step()
        return loss_val

    def predict(self, batch, unsort=True):
        inputs, labels, tokens, head, first_pos, second_pos, third_pos, cross, lens = unpack_batch(
            batch, self.opt['cuda'])

        # orig_idx = batch[9]
        orig_idx = batch[10]

        # forward
        self.model.eval()
        logits, _ = self.model(inputs)
        loss = self.criterion(logits, labels)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        if unsort:
            _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
                    predictions, probs)))]
        return predictions, probs, loss.item()
Exemplo n.º 5
0
class GCNTrainer(Trainer):
    def __init__(self, opt, emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
        self.model.to(self.opt["device"])
        if opt['cuda']:
            self.model.cuda()

    def predict(self, batch, unsort=True):
        input_ids,input_mask,segment_ids,label_id=batch[12],batch[13],batch[14],batch[15]
        inputs, labels, tokens, head, subj_pos, obj_pos, lens = unpack_batch(batch, self.opt['cuda'])
        l = (inputs[1].data.cpu().numpy() == 0).astype(np.int64).sum(1)
        orig_idx = batch[11]
        device=self.opt["device"]
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        label_id = label_id.to(device)
        self.model.eval()
        with torch.no_grad():
            logits,loss = self.model(inputs,input_ids,input_mask,segment_ids,label_id,l,b=1)
        probs = F.softmax(logits, 1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        if unsort:
            _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
                    predictions, probs)))]
        return predictions, probs, loss.item()
Exemplo n.º 6
0
class GCNTrainer(Trainer):
    def __init__(self, opt, lbstokens, emb_matrix=None, cls=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        if cls is None:
            self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix)
        else:
            self.model = GCNClassifier(opt,
                                       lbstokens,
                                       emb_matrix=emb_matrix,
                                       cls=cls)
        self.cls = cls
        self.rel_types = len(constant.LABEL_TO_ID)
        self.loss_matrix = torch.zeros((self.rel_types, self.rel_types),
                                       requires_grad=False)
        self.miss_matrix = torch.zeros((self.rel_types, self.rel_types),
                                       requires_grad=False)
        # self.alpha=torch.full((1,),0.1,requires_grad=True)
        # self.beta=torch.full((1,),0.1,requires_grad=True)
        #self.model = nn.DataParallel(GCNClassifier(opt, emb_matrix=emb_matrix),device_ids=[0,1,2,3])
        #self.model.half()
        print(self.get_parameter_number(self.model))
        self.soft_criterion = SoftCrossEntropyLoss()
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
            self.loss_matrix = self.loss_matrix.cuda()
            self.miss_matrix = self.miss_matrix.cuda()
        self.optimizer = torch_utils.get_optimizer(
            opt['optim'], [{
                'params': self.parameters
            }], opt['lr'])

    def get_parameter_number(self, net):
        total_num = sum(p.numel() for p in net.parameters())
        trainable_num = sum(p.numel() for p in net.parameters()
                            if p.requires_grad)
        return {'Total': total_num, 'Trainable': trainable_num}

    def update(self, batch, is_soft=False):
        inputs, labels, tokens, distance, ids, iscross = unpack_batch(
            batch, self.opt['cuda'])
        # step forward
        self.model.train()
        self.optimizer.zero_grad()
        logits, pooling_output = self.model(inputs)

        # batch_size = labels.shape[0]
        # rel_types= len(constant.LABEL_TO_ID)

        # indices = torch.cat((labels.unsqueeze(0), logits.argmax(dim=-1).unsqueeze(0)),dim=0)
        #
        # extra_loss =torch.from_numpy(np.apply_along_axis(calchis, -1, self.loss_matrix.cpu().numpy()))
        # extra_loss =F.softmax(extra_loss.reshape(-1)).reshape(rel_types,rel_types)[tuple(indices.cpu().detach().numpy().tolist())]

        #calc&update history loss
        #margin=self.loss_matrix[labels]

        #indices = torch.cat((torch.linspace(0, batch_size - 1, batch_size).unsqueeze(0).long().cuda(), labels.unsqueeze(0)))
        # gold_mask = torch.sparse_coo_tensor(indices, torch.ones(batch_size),
        #                                     (batch_size, rel_types)).to_dense().cuda()
        # self.logits=(1+(distance / 14.0)).unsqueeze(-1).mul(logits)
        #logits = logits + 0.15*torch.abs(logits.mul(margin))
        # indices = torch.cat((torch.linspace(0, batch_size - 1, batch_size).unsqueeze(0).long(), labels.unsqueeze(0)))
        # gold_logits = torch.masked_select(logits, gold_mask == 1)
        # dif = (logits.max(dim=-1)[0] - gold_logits).detach()
        # extra_loss=dif.mul(extra_loss.float()).sum().squeeze()
        # loss+=extra_loss
        if is_soft:
            # if self.cls is not None:
            #     labels=torch.FloatTansor([[1-l[self.cls],l[self.cls]] for l in labels])
            #     if self.opt['cuda']:
            #         labels=labels.cuda()
            labels = labels.float()
            loss = self.soft_criterion(logits, labels)
        else:
            # if self.cls is not None:
            #     labels =torch.LongTensor([l if l in self.cls else 0 for l in labels])
            #     if self.opt['cuda']:
            #         labels=labels.cuda()
            loss = self.criterion(logits, labels)
        # l2 decay on all conv layers
        if self.opt.get('conv_l2', 0) > 0:
            loss += self.model.conv_l2() * self.opt['conv_l2']
        # l2 penalty on output representations
        if self.opt.get('pooling_l2', 0) > 0:
            loss += self.opt['pooling_l2'] * (pooling_output**2).sum(1).mean()

        loss_val = loss.item()
        # print(((predictions) * (labels != 0) * 1).sum())
        #indices = torch.cat((labels.unsqueeze(0), logits.argmax(dim=-1).unsqueeze(0)), dim=0).detach()
        # mismat = torch.sparse_coo_tensor(indices, dif,
        #                                  (rel_types, rel_types)).to_dense()
        # mismask = torch.sparse_coo_tensor(indices, torch.ones(batch_size),
        #                                    (rel_types, rel_types)).to_dense().cuda()
        # # mismask = mismask - 1
        # mismask = mismask.masked_fill(mismask >= 0, 0)
        # mismat += mismask
        #self.miss_matrix += mismask
        # # # backward
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                       self.opt['max_grad_norm'])
        self.optimizer.step()
        return loss_val

    def predict(self, batch, unsort=True):
        inputs, labels, tokens, distance, ids, iscross = unpack_batch(
            batch, self.opt['cuda'])
        orig_idx = batch[21]
        batch_size = labels.shape[0]
        # forward
        self.model.eval()
        logits, _ = self.model(inputs)
        # labels_map = dict(zip(self.cls, list(range(1, 1 + len(self.cls)))))

        # if self.cls is not None:
        #     labels=labels.cpu().detach().numpy().tolist()
        #     labels =torch.LongTensor([labels_map[l] if l in self.cls else 0 for l in labels])
        #     if self.opt['cuda']:
        #         labels=labels.cuda()
        #raw_predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()
        # logits[:,1:]=logits[:,1:].mul((iscross==0).unsqueeze(-1).float().cuda())
        loss = self.criterion(logits, labels)
        probs = F.softmax(logits, dim=-1).data.cpu().numpy().tolist()
        predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist()

        samples = []
        for i in range(batch_size):
            if predictions[i] != labels[i]:
                samples.append(ids[i])
        NER2ID = constant.NER_TO_ID
        ID2NER = dict([(v, k) for k, v in NER2ID.items()])
        if unsort:
            _, predictions, probs = [list(t) for t in zip(*sorted(zip(orig_idx,\
                    predictions, probs)))]
        ids = [str(i) for i in ids]
        probs = dict(zip(ids, probs))
        return predictions, probs, loss.item(), samples

    def updatemisclass(self):
        self.loss_matrix = self.miss_matrix.clone()
        self.loss_matrix = self.loss_matrix / (self.loss_matrix.sum(
            dim=-1).unsqueeze(-1))
        np.save('missclass.npy', self.miss_matrix.cpu().detach().numpy())
        self.loss_matrix = self.loss_matrix.masked_fill(
            torch.eye(self.rel_types).cuda() == 1, 0)
        self.miss_matrix = self.miss_matrix.zero_()