Пример #1
0
    def __init__(self,
                 n_tags,
                 pc=None,
                 idxs=None,
                 vocab=None,
                 span_type=None,
                 pad_idx=None):
        """Initialize the object.

        :param n_tags: int The number of tags in your output (emission size)
        :param pc: dy.ParameterCollection
        :param idxs: Tuple(int. int) The index of the start and stop symbol in emissions.

        Note:
            if idxs is none then the CRF adds these symbols to the emission vectors and
            n_tags is assumed to be the number of output tags.

            if idxs is not none then the first element is assumed to be the start index
            and the second idx is assumed to be the end index. In this case n_tags is
            assumed to include the start and end symbols.

            if vocab is not None then we create a mask to reduce the probability of
            illegal transitions.
        """
        super(CRF, self).__init__()
        if pc is None:
            self.pc = dy.ParameterCollection()
        else:
            self.pc = pc.add_subcollection(name="CRF")
        if idxs is None:
            self.start_idx = n_tags
            self.end_idx = n_tags + 1
            self.n_tags = n_tags + 2
            self.add_ends = True
        else:
            self.start_idx, self.end_idx = idxs
            self.n_tags = n_tags
            self.add_ends = False
        self.mask = None
        if vocab is not None:
            assert span_type is not None, "To mask transitions you need to provide a tagging span_type, choices are `IOB`, `BIO` (or `IOB2`), and `IOBES`"
            if idxs is None:
                vocab = vocab.copy()
                vocab['<GO>'] = self.start_idx
                vocab['<EOS>'] = self.end_idx
            self.mask = crf_mask(vocab, span_type, self.start_idx,
                                 self.end_idx, pad_idx)
            self.inv_mask = (self.mask == 0) * -1e4

        self.transitions_p = self.pc.add_parameters((self.n_tags, self.n_tags),
                                                    name="transition")
Пример #2
0
def test_BIO_shape(BIO):
    assert BIO.shape == (len(IOBv), len(IOBv))
    mask = crf_mask(IOBv, "IOB2", IOBv['<GO>'], IOBv['<EOS>'], IOBv['<PAD>'])
    assert mask.shape == (len(IOBv), len(IOBv))
Пример #3
0
def IOBES():
    return crf_mask(IOBESv, "IOBES", IOBESv['<GO>'], IOBESv['<EOS>'],
                    IOBESv['<PAD>'])
Пример #4
0
def BIO():
    return crf_mask(IOBv, "BIO", IOBv['<GO>'], IOBv['<EOS>'], IOBv['<PAD>'])
Пример #5
0
def IOB():
    return crf_mask(IOBv, "IOB", IOBv['<GO>'], IOBv['<EOS>'], IOBv['<PAD>'])