Пример #1
0
    def info(self):
        """
        Print the feature statistics for the given model. (Assumes MaxEnt)
        """
        mallet = c['mallet']
        env = set_env_lang_utf8()
        info_bin = os.path.join(os.path.join(mallet, 'bin'), 'classifier2info')
        info_p = sub.Popen([info_bin, '--classifier', self._model],
                            stdout=sub.PIPE, stdin=sub.PIPE, stderr=sub.PIPE, env=env)

        cur_class = None
        feats = TwoLevelCountDict()

        # Go through and pick out what the features are for
        for line in info_p.stdout:
            content = line.decode(encoding='utf-8')

            class_change = re.search('FEATURES FOR CLASS (.*)', content)
            # Set the current class if the section changes
            if class_change:
                cur_class = class_change.group(1).strip()
                continue

            # Otherwise, let's catalog the features.
            word, prob = content.split()
            feats.add(cur_class, word, float(prob))

        # Now, print some info
        for cur_class in feats.keys():
            print(cur_class, end='\t')
            print('%s:%.4f' % ('<default>', feats[cur_class]['<default>']), end='\t')
            top_10 = feats.top_n(cur_class, n=10, key2_re='^nom')
            print('\t'.join(['%s:%.4f' % (w,p) for w,p in top_10]))
Пример #2
0
    def __init__(self, model=None):
        if model is None:
            self._model = classifier
        else:
            self._model = model

        mallet_bin = os.path.join(os.path.join(mallet, 'bin'), 'mallet')

        env = set_env_lang_utf8()
        self.c = sub.Popen([mallet_bin,
                            'classify-file',
                            '--classifier', self._model,
                            '--input', '-',
                            '--output', '-'],
                           stdout=sub.PIPE, stdin=sub.PIPE, env=env)
        self._first = True