Пример #1
0
    def test_should_generate_converting_circumflexes(self):
        lexeme = Lexeme(u"rüzgâr", u"rüzgâr", SyntacticCategory.NOUN, None,
                        None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(
            generated_roots,
            has_item(
                Root(u"rüzgar", lexeme, None,
                     {LLCont, LVB, LLC, LLNotVless, LVU})))
        assert_that(
            generated_roots,
            has_item(
                Root(u"rüzgâr", lexeme, None,
                     {LLCont, LVB, LLC, LLNotVless, LVU})))

        lexeme = Lexeme(u"alenî", u"alenî", SyntacticCategory.ADJECTIVE, None,
                        None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(
            generated_roots,
            has_item(
                Root(u"aleni", lexeme, None,
                     {LLNotCont, LVF, LLV, LLNotVless, LVU})))
        assert_that(
            generated_roots,
            has_item(
                Root(u"alenî", lexeme, None,
                     {LLNotCont, LVF, LLV, LLNotVless, LVU})))

        lexeme = Lexeme(u"cülûs", u"cülûs", SyntacticCategory.NOUN, None, None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(
            generated_roots,
            has_item(
                Root(u"cülus", lexeme, None,
                     {LLCont, LVB, LLC, LLVless, LVR})))
        assert_that(
            generated_roots,
            has_item(
                Root(u"cülûs", lexeme, None,
                     {LLCont, LVB, LLC, LLVless, LVR})))

        lexeme = Lexeme(u"Âdem", u"Âdem", SyntacticCategory.NOUN, None, None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(
            generated_roots,
            has_item(
                Root(u"Adem", lexeme, None,
                     {LLCont, LVF, LLC, LLNotVless, LVU})))
        assert_that(
            generated_roots,
            has_item(
                Root(u"Âdem", lexeme, None,
                     {LLCont, LVF, LLC, LLNotVless, LVU})))
Пример #2
0
def initialize():
    all_roots = []
    lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../resources/master_dictionary.txt'))
    for di in lexemes:
        all_roots.extend(CircumflexConvertingRootGenerator.generate(di))

    root_map_generator = RootMapGenerator()
    root_map = root_map_generator.generate(all_roots)

    suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph())))
    suffix_graph.initialize()

    predefined_paths = PredefinedPaths(root_map, suffix_graph)
    predefined_paths.create_predefined_paths()

    word_root_finder = WordRootFinder(root_map)
    text_numeral_root_finder = TextNumeralRootFinder(root_map)
    digit_numeral_root_finder = DigitNumeralRootFinder()
    proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder()
    proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder()

    global contextless_parser
    contextless_parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths,
        [word_root_finder, text_numeral_root_finder, digit_numeral_root_finder,
         proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder])
Пример #3
0
def initialize():
    all_roots = []
    lexemes = LexiconLoader.load_from_file(
        os.path.join(os.path.dirname(__file__),
                     '../resources/master_dictionary.txt'))
    for di in lexemes:
        all_roots.extend(CircumflexConvertingRootGenerator.generate(di))

    root_map_generator = RootMapGenerator()
    root_map = root_map_generator.generate(all_roots)

    suffix_graph = CopulaSuffixGraph(
        NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph())))
    suffix_graph.initialize()

    predefined_paths = PredefinedPaths(root_map, suffix_graph)
    predefined_paths.create_predefined_paths()

    word_root_finder = WordRootFinder(root_map)
    text_numeral_root_finder = TextNumeralRootFinder(root_map)
    digit_numeral_root_finder = DigitNumeralRootFinder()
    proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder(
    )
    proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder(
    )

    global contextless_parser
    contextless_parser = ContextlessMorphologicalParser(
        suffix_graph, predefined_paths, [
            word_root_finder, text_numeral_root_finder,
            digit_numeral_root_finder, proper_noun_from_apostrophe_root_finder,
            proper_noun_without_apostrophe_root_finder
        ])
Пример #4
0
    def test_should_generate_converting_circumflexes(self):
        lexeme = Lexeme(u"rüzgâr", u"rüzgâr", SyntacticCategory.NOUN, None, None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(generated_roots, has_item(Root(u"rüzgar", lexeme, None, {LLCont, LVB, LLC, LLNotVless, LVU})))
        assert_that(generated_roots, has_item(Root(u"rüzgâr", lexeme, None, {LLCont,LVB, LLC, LLNotVless, LVU})))

        lexeme = Lexeme(u"alenî", u"alenî", SyntacticCategory.ADJECTIVE, None, None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(generated_roots, has_item(Root(u"aleni", lexeme, None, {LLNotCont,LVF, LLV, LLNotVless, LVU})))
        assert_that(generated_roots, has_item(Root(u"alenî", lexeme, None, {LLNotCont,LVF, LLV, LLNotVless, LVU})))

        lexeme = Lexeme(u"cülûs", u"cülûs", SyntacticCategory.NOUN, None, None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(generated_roots, has_item(Root(u"cülus", lexeme, None, {LLCont,LVB, LLC, LLVless, LVR})))
        assert_that(generated_roots, has_item(Root(u"cülûs", lexeme, None, {LLCont,LVB, LLC, LLVless, LVR})))

        lexeme = Lexeme(u"Âdem", u"Âdem", SyntacticCategory.NOUN, None, None)
        generated_roots = CircumflexConvertingRootGenerator.generate(lexeme)
        assert_that(generated_roots, has_length(2))
        assert_that(generated_roots, has_item(Root(u"Adem", lexeme, None, {LLCont,LVF, LLC, LLNotVless, LVU})))
        assert_that(generated_roots, has_item(Root(u"Âdem", lexeme, None, {LLCont,LVF, LLC, LLNotVless, LVU})))
    def setUpClass(cls):
        super(ParserTestWithSimpleParseSets, cls).setUpClass()
        all_roots = []

        lexemes = LexiconLoader.load_from_file(
            os.path.join(os.path.dirname(__file__), "../../../../resources/master_dictionary.txt")
        )
        for di in lexemes:
            all_roots.extend(CircumflexConvertingRootGenerator.generate(di))

        root_map_generator = RootMapGenerator()
        cls.root_map = root_map_generator.generate(all_roots)

        suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph())))
        suffix_graph.initialize()

        predefined_paths = PredefinedPaths(cls.root_map, suffix_graph)
        predefined_paths.create_predefined_paths()

        word_root_finder = WordRootFinder(cls.root_map)
        text_numeral_root_finder = TextNumeralRootFinder(cls.root_map)
        digit_numeral_root_finder = DigitNumeralRootFinder()
        proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder()
        proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder()

        cls.parser = UpperCaseSupportingContextlessMorphologicalParser(
            suffix_graph,
            predefined_paths,
            [
                word_root_finder,
                text_numeral_root_finder,
                digit_numeral_root_finder,
                proper_noun_from_apostrophe_root_finder,
                proper_noun_without_apostrophe_root_finder,
            ],
        )
from trnltk.morphology.model import formatter
from trnltk.morphology.morphotactics.basicsuffixgraph import BasicSuffixGraph
from trnltk.morphology.morphotactics.copulasuffixgraph import CopulaSuffixGraph
from trnltk.morphology.contextless.parser.parser import  logger as parser_logger, UpperCaseSupportingContextlessMorphologicalParser
from trnltk.morphology.contextless.parser.rootfinder import WordRootFinder, DigitNumeralRootFinder, TextNumeralRootFinder, ProperNounFromApostropheRootFinder, ProperNounWithoutApostropheRootFinder
from trnltk.morphology.contextless.parser.suffixapplier import logger as suffix_applier_logger
from trnltk.morphology.morphotactics.numeralsuffixgraph import NumeralSuffixGraph
from trnltk.morphology.morphotactics.predefinedpaths import PredefinedPaths

from trnltk.morphology.morphotactics.propernounsuffixgraph import ProperNounSuffixGraph

all_roots = []

lexemes = LexiconLoader.load_from_file('trnltk/trnltk/resources/master_dictionary.txt')
for di in lexemes:
	all_roots.extend(CircumflexConvertingRootGenerator.generate(di))

root_map_generator = RootMapGenerator()
root_map = root_map_generator.generate(all_roots)

suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph())))
suffix_graph.initialize()

predefined_paths = PredefinedPaths(root_map, suffix_graph)
predefined_paths.create_predefined_paths()

word_root_finder = WordRootFinder(root_map)
text_numeral_root_finder = TextNumeralRootFinder(root_map)
digit_numeral_root_finder = DigitNumeralRootFinder()
proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder()
proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder()