Пример #1
0
def main(train_dir, dev_dir, test_dir):
    # input_dir = "data/train/*.muc"
    vocabs_dir = "embedding/vocabs.json"

    counter = Counter()

    # num_sens = 0
    read_file(train_dir, counter, update_kb=True)
    read_file(dev_dir, counter)
    read_file(test_dir, counter)


    print(counter)
    # print("Num sent train: %s" % num_sens)
    print("longest sentence: %s" % str(counter.longest_sen))
    print("longest word: %s" % counter.longest_word())

    kb_words = {k: list(v) for k, v in counter.kb_words.items()}
    json_dump(kb_words, "embedding/kb_words.json")

    word2idx = construct_word_embeddings(counter.word_vocab)
    char2idx = construct_char_embeddings(counter.char_vocab)
    pos2idx = construct_pos_embeddings(counter.pos_tags)
    ner2idx = construct_ner_embeddings(counter.ner_tags)

    vocabs = ObjectDict(make_dict(word2idx, char2idx, ner2idx, pos2idx), max_sen_len=counter.max_sen_len, max_word_len=counter.max_word_len)
    vocabs.save(vocabs_dir)
Пример #2
0
 def __init__(self):
     self.max_sen_len = 0
     self.longest_sen = None
     self.max_word_len = 0
     self.word_vocab = set()
     self.char_vocab = set()
     self.pos_tags = set()
     self.ner_tags = set()
     self.kb_words = ObjectDict()
Пример #3
0
 def __init__(self, words):
     self.words = [Word(w) for w in words]
     self.max_word_len = max(len(w) for w in self.words)
     self.word_vocab = set(w.normalized for w in self.words)
     self.char_vocab = [w.char_vocab for w in self.words]
     self.char_vocab = itertools.chain(*self.char_vocab)
     self.char_vocab = set(self.char_vocab)
     self.pos_tags = set(w.pos for w in self.words)
     self.ner_tags = set(w.ner for w in self.words)
     self.kb_words = ObjectDict()
     for w, pre_w in zip(self.words[1:], self.words[:-1]):
         if w.ner != 'O' and pre_w.ner == 'O' and not pre_w.should_ignore():
             ner = w.ner[-3:]
             if ner not in self.kb_words:
                 self.kb_words[ner] = set()
             self.kb_words[ner].add(pre_w.normalized)
Пример #4
0
    def testBuildChoices(self):
        import logging
        from thb.common import build_choices
        import random
        from thb.characters.baseclasses import get_characters
        from thb import characters
        from game import autoenv
        # def build_choices(g, items, candidates, players, num, akaris, shared):

        log = logging.getLogger('test')

        g = ObjectDict({
            'players':
            BatchList([
                ObjectDict({
                    'account':
                    ObjectDict({'userid': i}),
                    'reveal':
                    lambda o, i=i: log.info('Reveal to %s: %s', i, o),
                }) for i in xrange(8)
            ]),
            'random':
            random.Random(12341234),
            'SERVER_SIDE':
            True,
            'CLIENT_SIDE':
            False,
        })
        autoenv.Game.getgame = staticmethod(lambda: g)
        chars = get_characters('common', '3v3')
        assert chars

        choices, imperial = build_choices(g, {}, chars, g.players, 10, 3, True)
        eq_(len(choices.items()), len(g.players))
        eq_(len(set([id(i) for i in choices.values()])), 1)
        eq_(set(choices.keys()), set(g.players))
        eq_(imperial, [])

        choices, imperial = build_choices(
            g, {0: ['imperial-choice:SpAya', 'foo']}, chars, g.players, 10, 3,
            True)
        eq_(len(choices.items()), len(g.players))
        eq_(len(set([id(i) for i in choices.values()])), 1)
        eq_(set(choices.keys()), set(g.players))
        p, c = imperial[0]
        eq_((p, c.char_cls), (g.players[0], characters.sp_aya.SpAya))
        assert c in choices[p]
        del c
        eq_(sum([c.akari for c in choices[p]]), 3)

        choices, imperial = build_choices(
            g, {0: ['imperial-choice:SpAya', 'foo']}, chars, g.players,
            [4] * 8, [1] * 8, False)
        eq_(len(choices.items()), len(g.players))
        eq_(len(set([id(i) for i in choices.values()])), 8)
        eq_(set(choices.keys()), set(g.players))
        eq_([len(i) for i in choices.values()], [4] * 8)
        eq_([len([j for j in i if j.akari]) for i in choices.values()],
            [1] * 8)

        p, c = imperial[0]
        eq_((p, c.char_cls), (g.players[0], characters.sp_aya.SpAya))
        assert c in choices[p]
Пример #5
0
def badge_metafunc(clsname, bases, _dict):
    _dict.pop('__module__')
    data = ObjectDict.parse(_dict)
    badges[clsname] = data
Пример #6
0
def tag_metafunc(clsname, bases, _dict):
    _dict.pop('__module__')
    data = ObjectDict.parse(_dict)
    tags[clsname] = data
Пример #7
0
def tag_metafunc(clsname, bases, _dict):
    _dict.pop('__module__')
    data = ObjectDict.parse(_dict)
    tags[clsname] = data
Пример #8
0
 def __init__(self, *a):
     from utils import ObjectDict
     self.uniform = ObjectDict()
     self.attrib = ObjectDict()
Пример #9
0
def badge_metafunc(clsname, bases, _dict):
    _dict.pop('__module__')
    data = ObjectDict.parse(_dict)
    badges[clsname] = data