Пример #1
0
 def test_count_vocab_items_uses_pos_tags(self):
     tokens = self.tokenizer.split_words("This is a sentence.")
     tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")]
     indexer = DepLabelIndexer()
     counter = defaultdict(lambda: defaultdict(int))
     for token in tokens:
         indexer.count_vocab_items(token, counter)
     assert counter["dep_labels"] == {"ROOT": 1, "nsubj": 1, "advmod": 3, "NONE": 2}
Пример #2
0
    def test_count_vocab_items_uses_pos_tags(self):
        tokens = self.tokenizer.split_words("This is a sentence.")
        tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")]
        indexer = DepLabelIndexer()
        counter = defaultdict(lambda: defaultdict(int))
        for token in tokens:
            indexer.count_vocab_items(token, counter)

        assert counter["dep_labels"] == {"ROOT": 1, "nsubj": 1,
                                         "det": 1, "NONE": 2, "attr": 1, "punct": 1}
    def test_count_vocab_items_uses_pos_tags(self):
        tokens = self.tokenizer.split_words(u"This is a sentence.")
        tokens = [Token(u"<S>")] + [t for t in tokens] + [Token(u"</S>")]
        indexer = DepLabelIndexer()
        counter = defaultdict(lambda: defaultdict(int))
        for token in tokens:
            indexer.count_vocab_items(token, counter)

        assert counter[u"dep_labels"] == {
            u"ROOT": 1,
            u"nsubj": 1,
            u"det": 1,
            u"NONE": 2,
            u"attr": 1,
            u"punct": 1
        }