示例#1
0
 def __init__(self, filename=None, encoding='utf8'):
     self.tagsetId = None
     self.tag2tagnum = {}
     #~ self._name2namenum = {}
     if filename:
         self._doInit(filename, encoding)
     self._tagnum2tag = dict(map(_a(lambda k, v: (v, k)), self.tag2tagnum.items()))
示例#2
0
    def serializeFSAPrologue(self):
        res = bytearray()

        # labels sorted by popularity
        sortedLabels = [
            label
            for (label,
                 freq) in sorted(self.fsa.label2Freq.items(),
                                 key=_a(lambda label, freq: (-freq, label)))
        ]

        # popular labels table
        self.label2ShortLabel = dict([(label, sortedLabels.index(label) + 1)
                                      for label in sortedLabels[:63]])

        logging.debug(
            dict([(chr(label), shortLabel)
                  for label, shortLabel in self.label2ShortLabel.items()]))

        # write remaining short labels (zeros)
        for label in range(256):
            res.append(self.label2ShortLabel.get(label, 0))

        # write a magic char before initial state
        res.append(ord('^'))

        return res
示例#3
0
    def serializeQualifiersMap(self):
        label2labelId = dict([
            (u'|'.join(qualifiers), n)
            for qualifiers, n in sorted(self.qualifiersMap.items(),
                                        key=_a(lambda qs, n: n))
        ])

        return self._serializeTags(label2labelId)
示例#4
0
 def _serializeTags(self, tagsMap):
     res = bytearray()
     numOfTags = len(tagsMap)
     res.extend(htons(numOfTags))
     for tag, tagnum in sorted(tagsMap.items(),
                               key=_a(lambda tag, tagnum: tagnum)):
         res.extend(htons(tagnum))
         res.extend(self.fsa.encodeWord(tag))
         res.append(0)
     return res
示例#5
0
 def getSortedTransitions(self, state):
     return sorted(state.transitionsMap.items(),
                   key=_a(lambda label, nextState: (-state.label2Freq.get(
                       label, 0), -self.fsa.label2Freq.get(label, 0))))