示例#1
0
文件: data.py 项目: ironsword666/CWS
 def __iter__(self):
     for raw_batch in super(TextDataLoader, self).__iter__():
         batch, device = [], 'cuda' if torch.cuda.is_available() else 'cpu'
         for data, field in zip(raw_batch, self.fields):
             if isinstance(field, Field):
                 if isinstance(data[0], torch.Tensor):
                     data = pad(data, field.pad_index).to(device)
                 elif isinstance(data[0], Iterable):
                     data = [
                         pad(f, field.pad_index).to(device)
                         for f in zip(*data)
                     ]
             batch.append(data)
         yield batch
示例#2
0
 def __iter__(self):
     for raw_batch in super().__iter__():
         batch, device = [], 'cuda' if torch.cuda.is_available() else 'cpu'
         for data, field in zip(raw_batch, self.fields):
             if isinstance(field, Field):
                 if isinstance(data[0], torch.Tensor):
                     data = pad(data, field.pad_index).to(device)
                 elif isinstance(data[0], Iterable):
                     data = [
                         pad(f, field.pad_index).to(device)
                         for f in zip(*data)
                     ]
             batch.append(data)
         # [list(word), list(bert), list(head), list(rels)] according to self.fields
         yield batch
示例#3
0
def eisner(scores, mask):
    lens = mask.sum(1)
    batch_size, seq_len, _ = scores.shape
    scores = scores.permute(2, 1, 0)
    s_i = torch.full_like(scores, float('-inf'))
    s_c = torch.full_like(scores, float('-inf'))
    p_i = scores.new_zeros(seq_len, seq_len, batch_size).long()
    p_c = scores.new_zeros(seq_len, seq_len, batch_size).long()
    s_c.diagonal().fill_(0)

    for w in range(1, seq_len):
        n = seq_len - w
        starts = p_i.new_tensor(range(n)).unsqueeze(0)
        # ilr = C(i->r) + C(j->r+1)
        ilr = stripe(s_c, n, w) + stripe(s_c, n, w, (w, 1))
        # [batch_size, n, w]
        il = ir = ilr.permute(2, 0, 1)
        # I(j->i) = max(C(i->r) + C(j->r+1) + s(j->i)), i <= r < j
        il_span, il_path = il.max(-1)
        s_i.diagonal(-w).copy_(il_span + scores.diagonal(-w))
        p_i.diagonal(-w).copy_(il_path + starts)
        # I(i->j) = max(C(i->r) + C(j->r+1) + s(i->j)), i <= r < j
        ir_span, ir_path = ir.max(-1)
        s_i.diagonal(w).copy_(ir_span + scores.diagonal(w))
        p_i.diagonal(w).copy_(ir_path + starts)

        # C(j->i) = max(C(r->i) + I(j->r)), i <= r < j
        cl = stripe(s_c, n, w, (0, 0), 0) + stripe(s_i, n, w, (w, 0))
        cl_span, cl_path = cl.permute(2, 0, 1).max(-1)
        s_c.diagonal(-w).copy_(cl_span)
        p_c.diagonal(-w).copy_(cl_path + starts)
        # C(i->j) = max(I(i->r) + C(r->j)), i < r <= j
        cr = stripe(s_i, n, w, (0, 1)) + stripe(s_c, n, w, (1, w), 0)
        cr_span, cr_path = cr.permute(2, 0, 1).max(-1)
        s_c.diagonal(w).copy_(cr_span)
        s_c[0, w][lens.ne(w)] = float('-inf')
        p_c.diagonal(w).copy_(cr_path + starts + 1)

    def backtrack(p_i, p_c, heads, i, j, complete):
        if i == j:
            return
        if complete:
            r = p_c[i, j]
            backtrack(p_i, p_c, heads, i, r, False)
            backtrack(p_i, p_c, heads, r, j, True)
        else:
            r, heads[j] = p_i[i, j], i
            i, j = sorted((i, j))
            backtrack(p_i, p_c, heads, i, r, True)
            backtrack(p_i, p_c, heads, j, r + 1, True)

    preds = []
    p_c = p_c.permute(2, 0, 1).cpu()
    p_i = p_i.permute(2, 0, 1).cpu()
    for i, length in enumerate(lens.tolist()):
        heads = p_c.new_zeros(length + 1, dtype=torch.long)
        backtrack(p_i[i], p_c[i], heads, 0, length, True)
        preds.append(heads.to(mask.device))

    return pad(preds, total_length=seq_len).to(mask.device)
示例#4
0
    def compose(self, sequences):
        r"""
        Composes a batch of sequences into a padded tensor.

        Args:
            sequences (list[~torch.Tensor]):
                A list of tensors.

        Returns:
            A padded tensor converted to proper device.
        """

        return pad(sequences, self.pad_index).to(self.device)
示例#5
0
    def transform(self, sequences):
        sequences = [[self.preprocess(token) for token in seq]
                     for seq in sequences]
        if self.fix_len <= 0:
            self.fix_len = max(
                len(token) for seq in sequences for token in seq)
        if self.use_vocab:
            sequences = [[[self.vocab[i] for i in token] for token in seq]
                         for seq in sequences]
        if self.bos:
            sequences = [[[self.bos_index]] + seq for seq in sequences]
        if self.eos:
            sequences = [seq + [[self.eos_index]] for seq in sequences]
        lens = [
            min(self.fix_len, max(len(ids) for ids in seq))
            for seq in sequences
        ]
        sequences = [
            pad([torch.tensor(ids[:i]) for ids in seq], self.pad_index, i)
            for i, seq in zip(lens, sequences)
        ]

        return sequences
示例#6
0
 def compose(self, sequences):
     return [pad(i).to(self.device) for i in zip(*sequences)]