def test_should_find_lemma_concordance(self): idx = DictionaryItemConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4])) assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gid"), equal_to([])) assert_that(idx.offsets(u"git", SyntacticCategory.VERB), equal_to([6, 7, 8, 9, 10, 11]))
def test_should_find_lemma_concordance(self): idx = DictionaryItemConcordanceIndex(self.word_list) assert_that(idx.offsets(u'something'), equal_to([])) assert_that(idx.offsets(u"o"), equal_to([0, 1, 2, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.PRONOUN), equal_to([0, 1, 3, 4])) assert_that(idx.offsets(u"o", SyntacticCategory.DETERMINER), equal_to([2])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.PERSONAL), equal_to([0, 3])) assert_that( idx.offsets(u"o", SyntacticCategory.PRONOUN, SecondarySyntacticCategory.DEMONSTRATIVE), equal_to([1, 4])) assert_that(idx.offsets(u"git"), equal_to([6, 7, 8, 9, 10, 11])) assert_that(idx.offsets(u"gid"), equal_to([])) assert_that(idx.offsets(u"git", SyntacticCategory.VERB), equal_to([6, 7, 8, 9, 10, 11]))
def _validate_lemma_concordance_indexes(self, word_list): idx = DictionaryItemConcordanceIndex(word_list) for lemma_root in idx._offsets._indices.iterkeys(): offsets = idx.offsets(lemma_root) words = [word_list[offset] for offset in offsets] assert_that( all([word.root.lemma_root == lemma_root for word in words])) for lemma_root in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[ lemma_root].iterkeys(): offsets = idx.offsets(lemma_root, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( all([ word.root.lemma_root == lemma_root and word.root.syntactic_category == syntactic_category for word in words ])) for lemma_root in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[ lemma_root].iterkeys(): for secondary_syntactic_category in idx._offsets._indices[ lemma_root][syntactic_category].iterkeys(): offsets = idx.offsets(lemma_root, syntactic_category, secondary_syntactic_category) words = [word_list[offset] for offset in offsets] assert_that( all([ word.root.lemma_root == lemma_root and word.root.syntactic_category == syntactic_category and word.root.secondary_syntactic_category == secondary_syntactic_category for word in words ]))
def _validate_lemma_concordance_indexes(self, word_list): idx = DictionaryItemConcordanceIndex(word_list) for lemma_root in idx._offsets._indices.iterkeys(): offsets = idx.offsets(lemma_root) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.lemma_root==lemma_root for word in words])) for lemma_root in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[lemma_root].iterkeys(): offsets = idx.offsets(lemma_root, syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.lemma_root==lemma_root and word.root.syntactic_category==syntactic_category for word in words])) for lemma_root in idx._offsets._indices.iterkeys(): for syntactic_category in idx._offsets._indices[lemma_root].iterkeys(): for secondary_syntactic_category in idx._offsets._indices[lemma_root][syntactic_category].iterkeys(): offsets = idx.offsets(lemma_root, syntactic_category, secondary_syntactic_category) words = [word_list[offset] for offset in offsets] assert_that(all([word.root.lemma_root==lemma_root and word.root.syntactic_category==syntactic_category and word.root.secondary_syntactic_category==secondary_syntactic_category for word in words]))