def __init__(self, n_tags, pc=None, idxs=None, vocab=None, span_type=None, pad_idx=None): """Initialize the object. :param n_tags: int The number of tags in your output (emission size) :param pc: dy.ParameterCollection :param idxs: Tuple(int. int) The index of the start and stop symbol in emissions. Note: if idxs is none then the CRF adds these symbols to the emission vectors and n_tags is assumed to be the number of output tags. if idxs is not none then the first element is assumed to be the start index and the second idx is assumed to be the end index. In this case n_tags is assumed to include the start and end symbols. if vocab is not None then we create a mask to reduce the probability of illegal transitions. """ super(CRF, self).__init__() if pc is None: self.pc = dy.ParameterCollection() else: self.pc = pc.add_subcollection(name="CRF") if idxs is None: self.start_idx = n_tags self.end_idx = n_tags + 1 self.n_tags = n_tags + 2 self.add_ends = True else: self.start_idx, self.end_idx = idxs self.n_tags = n_tags self.add_ends = False self.mask = None if vocab is not None: assert span_type is not None, "To mask transitions you need to provide a tagging span_type, choices are `IOB`, `BIO` (or `IOB2`), and `IOBES`" if idxs is None: vocab = vocab.copy() vocab['<GO>'] = self.start_idx vocab['<EOS>'] = self.end_idx self.mask = crf_mask(vocab, span_type, self.start_idx, self.end_idx, pad_idx) self.inv_mask = (self.mask == 0) * -1e4 self.transitions_p = self.pc.add_parameters((self.n_tags, self.n_tags), name="transition")
def test_BIO_shape(BIO): assert BIO.shape == (len(IOBv), len(IOBv)) mask = crf_mask(IOBv, "IOB2", IOBv['<GO>'], IOBv['<EOS>'], IOBv['<PAD>']) assert mask.shape == (len(IOBv), len(IOBv))
def IOBES(): return crf_mask(IOBESv, "IOBES", IOBESv['<GO>'], IOBESv['<EOS>'], IOBESv['<PAD>'])
def BIO(): return crf_mask(IOBv, "BIO", IOBv['<GO>'], IOBv['<EOS>'], IOBv['<PAD>'])
def IOB(): return crf_mask(IOBv, "IOB", IOBv['<GO>'], IOBv['<EOS>'], IOBv['<PAD>'])