Пример #1
0
def _enumerate_pairs(n, ckey):
    if ckey == 0 or ckey == n - 1:
        pairs = None
    else:
        pairs = get_pair_key(
            np.tile(np.arange(0, ckey), (n - ckey - 1, 1)).T.reshape(-1),
            np.tile(np.arange(ckey + 1, n), ckey))
        return pairs
    return pairs
Пример #2
0
def _enumerate_bipairs(n, ckey, division):
    if division == PairDivision.LEFT_RIGHT:
        raise NotImplementedError
    elif (division == PairDivision.BEGIN_END
          or division == PairDivision.INNER_OUTER):
        if ckey == 0 or ckey == n - 1:
            bipairs = None
        else:
            pairs = get_pair_key(
                np.tile(np.arange(0, ckey), (n - ckey - 1, 1)).T.reshape(-1),
                np.tile(np.arange(ckey + 1, n), ckey))
            bipairs = (pairs, pairs)
    else:
        raise ValueError('unknown division: {}'.format(division))
    return bipairs
Пример #3
0
    def compute_metrics(self, output, gold):
        gold_pairs = [
            _get_true_pair(ckey, ckey_type, coords_i)
            for ckeys_i, ckey_types_i, coords_i in zip(
                output['ckeys'], output['ckey_types'], gold)
            for ckey, ckey_type in zip(ckeys_i, ckey_types_i)
        ]
        if output.get('ckey_scores') is None:
            output.update(
                self._forward_scores(output['encoded_seqs'], output['lengths'],
                                     output['ckeys'], output['ckey_types'],
                                     gold_pairs))
        ckey_scores = output['ckey_scores']
        xp = chainer.cuda.get_array_module(ckey_scores)

        scores = output['pair_scores']
        offsets = output['pair_offsets']
        scores = list(F.split_axis(scores, offsets[:-1], axis=0))
        indices = []
        k = 0
        for pairs_i in output['pairs']:
            for pairs_j in pairs_i:
                gold_pair = gold_pairs[k]
                if pairs_j is None or gold_pair is None:
                    indices.append(-1)
                else:
                    idx = np.argwhere(pairs_j == get_pair_key(*gold_pair))
                    assert idx.size == 1
                    idx = idx[0, 0]
                    indices.append(idx)
                k += 1

        indices = xp.asarray(indices, xp.int32)
        if offsets[-1] > 0:
            scores = F.pad_sequence(scores, padding=-np.inf)
            assert scores.shape[0] == indices.size
            scores = F.hstack((ckey_scores, scores))
        else:
            scores = ckey_scores
        indices += 1
        coord_loss = F.softmax_cross_entropy(scores, indices)
        coord_accuracy = common.accuracy(scores, indices)

        result = {'coord_loss': coord_loss, 'coord_accuracy': coord_accuracy}
        return result
Пример #4
0
 def lookup_bispan_score(self, sentence_index, ckey, bispan):
     entry = self.get_entries(sentence_index)[ckey]
     bipair = _bispan_to_bipair(bispan, self._division)
     score1 = entry['pairs1_entries'][get_pair_key(*bipair[0])]
     score2 = entry['pairs2_entries'][get_pair_key(*bipair[1])]
     return score1 + score2
Пример #5
0
    def compute_metrics(self, output, gold):
        division = self._division
        ckey_labels, gold_bipairs = zip(
            *[(_get_true_ckey_label(ckey, ckey_type, coords_i),
               _get_true_bipair(ckey, ckey_type, coords_i, division))
              for ckeys_i, ckey_types_i, coords_i in zip(
                  output['ckeys'], output['ckey_types'], gold)
              for ckey, ckey_type in zip(ckeys_i, ckey_types_i)])
        if output.get('ckey_scores') is None:
            output.update(
                self._forward_scores(output['encoded_seqs'], output['lengths'],
                                     output['ckeys'], output['ckey_types'],
                                     gold_bipairs))
        ckey_scores = output['ckey_scores']
        xp = chainer.cuda.get_array_module(ckey_scores)
        ckey_labels = xp.asarray(ckey_labels, xp.int32)
        ckey_loss = F.softmax_cross_entropy(ckey_scores, ckey_labels)
        ckey_accuracy = common.accuracy(ckey_scores, ckey_labels)

        scores1, scores2 = output['bipair_scores']
        offsets1, offsets2 = output['bipair_offsets']
        scores1 = list(F.split_axis(scores1, offsets1[:-1], axis=0))
        scores2 = list(F.split_axis(scores2, offsets2[:-1], axis=0))
        indices1, indices2 = [], []
        exclude = []
        k = 0
        for bipairs_i in output['bipairs']:
            for bipairs_j in bipairs_i:
                gold_bipair = gold_bipairs[k]
                if bipairs_j is None or gold_bipair is None:
                    exclude.append(k)
                else:
                    idx1 = np.argwhere(bipairs_j[0] == get_pair_key(
                        *gold_bipair[0]))
                    idx2 = np.argwhere(bipairs_j[1] == get_pair_key(
                        *gold_bipair[1]))
                    assert idx1.size == idx2.size == 1
                    idx1, idx2 = idx1[0, 0], idx2[0, 0]
                    indices1.append(idx1)
                    indices2.append(idx2)
                k += 1
        for k in reversed(exclude):
            del scores1[k], scores2[k]

        if len(scores1) > 0:
            scores1 = F.pad_sequence(scores1, padding=-np.inf)
            indices1 = xp.asarray(indices1, xp.int32)
            assert scores1.shape[0] == indices1.size
            pair_loss1 \
                = F.softmax_cross_entropy(scores1, indices1, reduce='no')
            pair_loss1 = F.sum(pair_loss1) / ckey_labels.size
            pair_accuracy1 = common.accuracy(scores1, indices1)
        else:
            pair_loss1 = chainer.Variable(xp.array(0.0, xp.float32))
            pair_accuracy1 = (0, 0)

        if len(scores2) > 0:
            scores2 = F.pad_sequence(scores2, padding=-np.inf)
            indices2 = xp.asarray(indices2, xp.int32)
            assert scores2.shape[0] == indices2.size
            pair_loss2 \
                = F.softmax_cross_entropy(scores2, indices2, reduce='no')
            pair_loss2 = F.sum(pair_loss2) / ckey_labels.size
            pair_accuracy2 = common.accuracy(scores2, indices2)
        else:
            pair_loss2 = chainer.Variable(xp.array(0.0, xp.float32))
            pair_accuracy2 = (0, 0)

        result = {
            'ckey_loss': ckey_loss,
            'ckey_accuracy': ckey_accuracy,
            'pair1_loss': pair_loss1,
            'pair1_accuracy': pair_accuracy1,
            'pair2_loss': pair_loss2,
            'pair2_accuracy': pair_accuracy2,
        }
        return result