def test_count_vocab_items_uses_pos_tags(self): tokens = self.tokenizer.split_words("This is a sentence.") tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")] indexer = DepLabelIndexer() counter = defaultdict(lambda: defaultdict(int)) for token in tokens: indexer.count_vocab_items(token, counter) assert counter["dep_labels"] == {"ROOT": 1, "nsubj": 1, "advmod": 3, "NONE": 2}
def test_count_vocab_items_uses_pos_tags(self): tokens = self.tokenizer.split_words("This is a sentence.") tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")] indexer = DepLabelIndexer() counter = defaultdict(lambda: defaultdict(int)) for token in tokens: indexer.count_vocab_items(token, counter) assert counter["dep_labels"] == {"ROOT": 1, "nsubj": 1, "det": 1, "NONE": 2, "attr": 1, "punct": 1}
def test_count_vocab_items_uses_pos_tags(self): tokens = self.tokenizer.split_words(u"This is a sentence.") tokens = [Token(u"<S>")] + [t for t in tokens] + [Token(u"</S>")] indexer = DepLabelIndexer() counter = defaultdict(lambda: defaultdict(int)) for token in tokens: indexer.count_vocab_items(token, counter) assert counter[u"dep_labels"] == { u"ROOT": 1, u"nsubj": 1, u"det": 1, u"NONE": 2, u"attr": 1, u"punct": 1 }