def setUp(self): self.parseset_creator = ParseSetCreator() all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map = (RootMapGenerator()).generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() self.parser = UpperCaseSupportingContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder])
def setUpClass(cls): super(_LikelihoodCalculatorTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() cls.contextless_parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) cls.mongodb_connection = pymongo.Connection(host='127.0.0.1') cls.collection_map = { 1: cls.mongodb_connection['trnltk']['wordUnigrams999'], 2: cls.mongodb_connection['trnltk']['wordBigrams999'], 3: cls.mongodb_connection['trnltk']['wordTrigrams999'] } cls.generator = None
def setUpClass(cls): super(TransitionGeneratorTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() cls.parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) cls.transition_generator = TransitionGenerator(cls.parser)
def setUp(self): super(PredefinedPathsTest, self).setUp() logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) self.predefined_paths = PredefinedPaths(self.root_map, self.suffix_graph)
def setUpClass(cls): super(InterpolatingLikelihoodCalculatorCalculationContextTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph( NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) cls.contextless_parser = UpperCaseSupportingContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ]) mongodb_connection = pymongo.Connection(host='127.0.0.1') cls.collection_map = { 1: mongodb_connection['trnltk']['wordUnigrams999'], 2: mongodb_connection['trnltk']['wordBigrams999'], 3: mongodb_connection['trnltk']['wordTrigrams999'] } database_index_builder = DatabaseIndexBuilder(cls.collection_map) target_form_given_context_counter = InMemoryCachingTargetFormGivenContextCounter( cls.collection_map) ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother( ) sequence_likelihood_calculator = UniformSequenceLikelihoodCalculator() wrapped_generator = ContextParsingLikelihoodCalculator( database_index_builder, target_form_given_context_counter, ngram_frequency_smoother, sequence_likelihood_calculator) cls.generator = InterpolatingLikelihoodCalculator(wrapped_generator)
def create(cls, master_dictionary_path, ngram_collection_map): """ @type master_dictionary_path: str or unicode @param ngram_collection_map: list<Collection> @rtype ContextfulMorphologicalParser """ all_roots = [] lexemes = LexiconLoader.load_from_file(master_dictionary_path) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() contextless_parser = UpperCaseSupportingContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) database_index_builder = DatabaseIndexBuilder(ngram_collection_map) target_form_given_context_counter = InMemoryCachingTargetFormGivenContextCounter(ngram_collection_map) ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother() sequence_likelihood_calculator = SequenceLikelihoodCalculator(None) collocation_metric_calculator = ContextParsingLikelihoodCalculator(database_index_builder, target_form_given_context_counter, ngram_frequency_smoother, sequence_likelihood_calculator) interpolating_collocation_metric_calculator = InterpolatingLikelihoodCalculator(collocation_metric_calculator) cached_contextless_distribution_smoother = CachedContextlessDistributionSmoother() contextless_distribution_metric_calculator = ContextlessDistributionCalculator(database_index_builder, target_form_given_context_counter, cached_contextless_distribution_smoother) contextful_likelihood_calculator = ContextfulLikelihoodCalculator(interpolating_collocation_metric_calculator, contextless_distribution_metric_calculator) sequence_likelihood_calculator._contextful_likelihood_calculator = contextful_likelihood_calculator contextful_morphological_parser = ContextfulMorphologicalParser(contextless_parser, contextful_likelihood_calculator) return contextful_morphological_parser
def setUpClass(cls): super(StatisticalParserTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph( BasicSuffixGraph())) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) contextless_parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ]) parseset_index = "001" dom = parse( os.path.join( os.path.dirname(__file__), '../../testresources/parsesets/parseset{}.xml'.format( parseset_index))) parseset = ParseSetBinding.build( dom.getElementsByTagName("parseset")[0]) parse_set_word_list = [] for sentence in parseset.sentences: parse_set_word_list.extend(sentence.words) complete_word_concordance_index = CompleteWordConcordanceIndex( parse_set_word_list) cls.parser = StatisticalParser(contextless_parser, complete_word_concordance_index)
def create_calculator(cls, parseset_index): all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() cls.contextless_parser = UpperCaseSupportingContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) mongodb_connection = pymongo.Connection(host='127.0.0.1') collection_map = { 1: mongodb_connection['trnltk']['wordUnigrams{}'.format(parseset_index)], 2: mongodb_connection['trnltk']['wordBigrams{}'.format(parseset_index)], 3: mongodb_connection['trnltk']['wordTrigrams{}'.format(parseset_index)] } database_index_builder = DatabaseIndexBuilder(collection_map) target_form_given_context_counter = InMemoryCachingTargetFormGivenContextCounter(collection_map) ngram_frequency_smoother = CachedSimpleGoodTuringNGramFrequencySmoother() sequence_likelihood_calculator = SequenceLikelihoodCalculator(None) collocation_metric_calculator = ContextParsingLikelihoodCalculator(database_index_builder, target_form_given_context_counter, ngram_frequency_smoother, sequence_likelihood_calculator) interpolating_collocation_metric_calculator = InterpolatingLikelihoodCalculator(collocation_metric_calculator) contextless_distribution_metric_calculator = ContextlessDistributionCalculator(database_index_builder, target_form_given_context_counter) contextful_likelihood_calculator = ContextfulLikelihoodCalculator(interpolating_collocation_metric_calculator, contextless_distribution_metric_calculator) sequence_likelihood_calculator._contextful_likelihood_calculator = contextful_likelihood_calculator return contextful_likelihood_calculator
def setUp(self): logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) self.cloned_root_map = copy(self._org_root_map) suffix_graph = CopulaSuffixGraph(BasicSuffixGraph()) suffix_graph.initialize() predefined_paths = PredefinedPaths(self.cloned_root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(self.cloned_root_map) self.parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder])
def setUpClass(cls): super(_LikelihoodCalculatorTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph( NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) cls.contextless_parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ]) cls.mongodb_connection = pymongo.Connection(host='127.0.0.1') cls.collection_map = { 1: cls.mongodb_connection['trnltk']['wordUnigrams999'], 2: cls.mongodb_connection['trnltk']['wordBigrams999'], 3: cls.mongodb_connection['trnltk']['wordTrigrams999'] } cls.generator = None
def setUpClass(cls): all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() cls.contextless_parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) mongodb_connection = pymongo.Connection(host='127.0.0.1') collection_map = { 1: mongodb_connection['trnltk']['wordUnigrams{}'.format(cls.parseset_index)] } database_index_builder = DatabaseIndexBuilder(collection_map) target_form_given_context_counter = TargetFormGivenContextCounter(collection_map) smoother = CachedContextlessDistributionSmoother() smoother.initialize() cls.calculator = ContextlessDistributionCalculator(database_index_builder, target_form_given_context_counter, smoother) cls.calculator.build_indexes()
def setUpClass(cls): super(TransitionGeneratorTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph( NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) cls.parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ]) cls.transition_generator = TransitionGenerator(cls.parser)
def setUpClass(cls): super(StatisticalParserTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(BasicSuffixGraph())) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() contextless_parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, digit_numeral_root_finder, text_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) parseset_index = "001" dom = parse(os.path.join(os.path.dirname(__file__), '../../testresources/parsesets/parseset{}.xml'.format(parseset_index))) parseset = ParseSetBinding.build(dom.getElementsByTagName("parseset")[0]) parse_set_word_list = [] for sentence in parseset.sentences: parse_set_word_list.extend(sentence.words) complete_word_concordance_index = CompleteWordConcordanceIndex(parse_set_word_list) cls.parser = StatisticalParser(contextless_parser, complete_word_concordance_index)
from trnltk.morphology.morphotactics.propernounsuffixgraph import ProperNounSuffixGraph all_roots = [] lexemes = LexiconLoader.load_from_file('trnltk/trnltk/resources/master_dictionary.txt') for di in lexemes: all_roots.extend(CircumflexConvertingRootGenerator.generate(di)) root_map_generator = RootMapGenerator() root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) text_numeral_root_finder = TextNumeralRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() parser = UpperCaseSupportingContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, text_numeral_root_finder, digit_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder]) sentence = sys.argv[1].decode('utf-8') for word in sentence.split():
class PredefinedPathsTest(unittest.TestCase): @classmethod def setUpClass(cls): super(PredefinedPathsTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) cls.morpheme_container_map = {} cls.suffix_graph = BasicSuffixGraph() cls.suffix_graph.initialize() def setUp(self): super(PredefinedPathsTest, self).setUp() logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) self.predefined_paths = PredefinedPaths(self.root_map, self.suffix_graph) def tearDown(self): self.predefined_paths = None self.morpheme_container_map = {} def test_should_have_paths_for_personal_pronouns(self): self.predefined_paths._create_predefined_path_of_ben() self.predefined_paths._create_predefined_path_of_sen() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN PERS = SecondarySyntacticCategory.PERSONAL # last one ends with transition to derivation state self.assert_defined_path(u'ben', PRON, PERS, u'ben(ben)+Pron+Pers+A1sg+Pnon+Nom', u'ben(ben)+Pron+Pers+A1sg+Pnon+Acc(i[i])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Loc(de[de])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Abl(den[den])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Ins(le[le])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Ins(imle[imle])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Gen(im[im])', u'ben(ben)+Pron+Pers+A1sg+Pnon+AccordingTo(ce[ce])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Nom') self.assert_defined_path(u'ban', PRON, PERS, u'ban(ben)+Pron+Pers+A1sg+Pnon+Dat(a[a])') # last one ends with transition to derivation state self.assert_defined_path(u'sen', PRON, PERS, u'sen(sen)+Pron+Pers+A2sg+Pnon+Nom', u'sen(sen)+Pron+Pers+A2sg+Pnon+Acc(i[i])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Loc(de[de])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Abl(den[den])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Ins(le[le])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Ins(inle[inle])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Gen(in[in])', u'sen(sen)+Pron+Pers+A2sg+Pnon+AccordingTo(ce[ce])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Nom') self.assert_defined_path(u'san', PRON, PERS, u'san(sen)+Pron+Pers+A2sg+Pnon+Dat(a[a])') def test_should_have_paths_for_hepsi(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_hepsi() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN # last one ends with transition to derivation state self.assert_defined_path(u'hepsi', PRON, None, u'hepsi(hepsi)+Pron+A3pl+P3pl+Nom', u'hepsi(hepsi)+Pron+A3pl+P3pl+Acc(ni[ni])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Dat(ne[ne])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Loc(nde[nde])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Abl(nden[nden])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Ins(yle[yle])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Gen(nin[nin])', u'hepsi(hepsi)+Pron+A3pl+P3pl+AccordingTo(nce[nce])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Nom') # last one ends with transition to derivation state self.assert_defined_path(u'hep', PRON, None, u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Nom', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Acc(i[i])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Dat(e[e])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Loc(de[de])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Abl(den[den])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Ins(le[le])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Gen(in[in])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+AccordingTo(ce[ce])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Nom', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Nom', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Acc(i[i])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Dat(e[e])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Loc(de[de])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Abl(den[den])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Ins(le[le])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Gen(in[in])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+AccordingTo(ce[ce])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Nom') def test_should_have_paths_for_ques(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_question_particles() self.morpheme_container_map = self.predefined_paths._morpheme_container_map QUES = SyntacticCategory.QUESTION # last one ends with transition to derivation state self.assert_defined_path(u'mı', QUES, None, u'mı(mı)+Ques+Pres+A1sg(yım[yım])', u'mı(mı)+Ques+Pres+A2sg(sın[sın])', u'mı(mı)+Ques+Pres+A3sg', u'mı(mı)+Ques+Pres+A1pl(yız[yız])', u'mı(mı)+Ques+Pres+A2pl(sınız[sınız])', u'mı(mı)+Ques+Pres+A3pl(lar[lar])', u'mı(mı)+Ques+Past(ydı[ydı])+A1sg(m[m])', u'mı(mı)+Ques+Past(ydı[ydı])+A2sg(n[n])', u'mı(mı)+Ques+Past(ydı[ydı])+A3sg', u'mı(mı)+Ques+Past(ydı[ydı])+A1pl(k[k])', u'mı(mı)+Ques+Past(ydı[ydı])+A2pl(nız[nız])', u'mı(mı)+Ques+Past(ydı[ydı])+A3pl(lar[lar])', u'mı(mı)+Ques+Narr(ymış[ymış])+A1sg(ım[ım])', u'mı(mı)+Ques+Narr(ymış[ymış])+A2sg(sın[sın])', u'mı(mı)+Ques+Narr(ymış[ymış])+A3sg', u'mı(mı)+Ques+Narr(ymış[ymış])+A1pl(ız[ız])', u'mı(mı)+Ques+Narr(ymış[ymış])+A2pl(sınız[sınız])', u'mı(mı)+Ques+Narr(ymış[ymış])+A3pl(lar[lar])') def test_should_have_paths_for_pronouns_with_implicit_possession(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_bazilari_bazisi() self.predefined_paths._create_predefined_path_of_kimileri_kimisi_kimi() self.predefined_paths._create_predefined_path_of_birileri_birisi_biri() self.predefined_paths._create_predefined_path_of_hicbirisi_hicbiri() self.predefined_paths._create_predefined_path_of_birbiri() self.predefined_paths._create_predefined_path_of_cogu_bircogu_coklari_bircoklari() self.predefined_paths._create_predefined_path_of_birkaci() self.predefined_paths._create_predefined_path_of_cumlesi() self.predefined_paths._create_predefined_path_of_digeri_digerleri() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN self.assert_defined_path(u'bazıları', PRON, None, u'bazıları(bazıları)+Pron+A3sg+P3sg', u'bazıları(bazıları)+Pron+A3sg+P1pl(mız[mız])', u'bazıları(bazıları)+Pron+A3sg+P2pl(nız[nız])') self.assert_defined_path(u'bazısı', PRON, None, u'bazısı(bazısı)+Pron+A3sg+P3sg') self.assert_defined_path(u'kimileri', PRON, None, u'kimileri(kimileri)+Pron+A3sg+P3sg', u'kimileri(kimileri)+Pron+A3sg+P1pl(miz[miz])', u'kimileri(kimileri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'kimisi', PRON, None, u'kimisi(kimisi)+Pron+A3sg+P3sg') self.assert_defined_path(u'kimi', PRON, None, u'kimi(kimi)+Pron+A3sg+P3sg', u'kimi(kimi)+Pron+A3sg+P1pl(miz[miz])', u'kimi(kimi)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'birileri', PRON, None, u'birileri(birileri)+Pron+A3sg+P3sg', u'birileri(birileri)+Pron+A3sg+P1pl(miz[miz])', u'birileri(birileri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'birisi', PRON, None, u'birisi(birisi)+Pron+A3sg+P3sg') self.assert_defined_path(u'biri', PRON, None, u'biri(biri)+Pron+A3sg+P3sg', u'biri(biri)+Pron+A3sg+P1pl(miz[miz])', u'biri(biri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'hiçbirisi', PRON, None, u'hiçbirisi(hiçbirisi)+Pron+A3sg+P3sg') self.assert_defined_path(u'hiçbiri', PRON, None, u'hiçbiri(hiçbiri)+Pron+A3sg+P3sg', u'hiçbiri(hiçbiri)+Pron+A3sg+P1pl(miz[miz])', u'hiçbiri(hiçbiri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'birbiri', PRON, None, u'birbiri(birbiri)+Pron+A3sg+P3sg', u'birbiri(birbiri)+Pron+A1pl+P1pl(miz[miz])', u'birbiri(birbiri)+Pron+A2pl+P2pl(niz[niz])') self.assert_defined_path(u'birbir', PRON, None, u'birbir(birbiri)+Pron+A3pl+P3pl(leri[leri])') self.assert_defined_path(u'çoğu', PRON, None, u'çoğu(çoğu)+Pron+A3sg+P3sg', u'çoğu(çoğu)+Pron+A3sg+P1pl(muz[muz])', u'çoğu(çoğu)+Pron+A3sg+P2pl(nuz[nuz])') self.assert_defined_path(u'birçoğu', PRON, None, u'birçoğu(birçoğu)+Pron+A3sg+P3sg', u'birçoğu(birçoğu)+Pron+A3sg+P1pl(muz[muz])', u'birçoğu(birçoğu)+Pron+A3sg+P2pl(nuz[nuz])') self.assert_defined_path(u'çokları', PRON, None, u'çokları(çokları)+Pron+A3sg+P3pl') self.assert_defined_path(u'birçokları', PRON, None, u'birçokları(birçokları)+Pron+A3sg+P3pl') self.assert_defined_path(u'birkaçı', PRON, None, u'birkaçı(birkaçı)+Pron+A3sg+P3sg', u'birkaçı(birkaçı)+Pron+A3sg+P1pl(mız[mız])', u'birkaçı(birkaçı)+Pron+A3sg+P2pl(nız[nız])') self.assert_defined_path(u'cümlesi', PRON, None, u'cümlesi(cümlesi)+Pron+A3sg+P3sg') self.assert_defined_path(u'diğeri', PRON, None, u'diğeri(diğeri)+Pron+A3sg+P3sg', u'diğeri(diğeri)+Pron+A3sg+P1pl(miz[miz])', u'diğeri(diğeri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'diğerleri', PRON, None, u'diğerleri(diğerleri)+Pron+A3sg+P3pl', u'diğerleri(diğerleri)+Pron+A3sg+P1pl(miz[miz])', u'diğerleri(diğerleri)+Pron+A3sg+P2pl(niz[niz])') def test_should_have_paths_for_irregular_pronouns(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_herkes() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN self.assert_defined_path(u'herkes', PRON, None, u'herkes(herkes)+Pron+A3sg+Pnon') def test_should_have_paths_for_pronouns_bura_sura_ora(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_ora_bura_sura_nere() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN self.assert_defined_path(u'or', PRON, None, u'or(ora)+Pron+A3sg+Pnon+Loc(da[da])', u'or(ora)+Pron+A3sg+Pnon+Abl(dan[dan])') self.assert_defined_path(u'bur', PRON, None, u'bur(bura)+Pron+A3sg+Pnon+Loc(da[da])', u'bur(bura)+Pron+A3sg+Pnon+Abl(dan[dan])') self.assert_defined_path(u'şur', PRON, None, u'şur(şura)+Pron+A3sg+Pnon+Loc(da[da])', u'şur(şura)+Pron+A3sg+Pnon+Abl(dan[dan])') self.assert_defined_path(u'ner', PRON, SecondarySyntacticCategory.QUESTION, u'ner(nere)+Pron+Ques+A3sg+Pnon+Loc(de[de])', u'ner(nere)+Pron+Ques+A3sg+Pnon+Abl(den[den])') def test_should_have_paths_for_iceri_disari(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_iceri_disari() self.morpheme_container_map = self.predefined_paths._morpheme_container_map NOUN = SyntacticCategory.NOUN self.assert_defined_path(u'içer', NOUN, None, u'i\xe7er(i\xe7eri)+Noun+A3sg+Pnon+Loc(de[de])', u'i\xe7er(i\xe7eri)+Noun+A3sg+Pnon+Abl(den[den])', u'i\xe7er(i\xe7eri)+Noun+A3sg+P3sg(si[si])') self.assert_defined_path(u'dışar', NOUN, None, u'd\u0131\u015far(d\u0131\u015far\u0131)+Noun+A3sg+Pnon+Loc(da[da])', u'd\u0131\u015far(d\u0131\u015far\u0131)+Noun+A3sg+Pnon+Abl(dan[dan])', u'd\u0131\u015far(d\u0131\u015far\u0131)+Noun+A3sg+P3sg(s\u0131[s\u0131])') def assert_defined_path(self, root, syntactic_category, secondary_syntactic_category, *args): assert_that(self.predefined_morpheme_containers(root, syntactic_category, secondary_syntactic_category), AreMorphemeContainersMatch([a for a in args])) def predefined_morpheme_containers(self, root_str, syntactic_category, secondary_syntactic_category): predefined_morpheme_containers = [] for root in self.morpheme_container_map.keys(): if root.str==root_str and root.lexeme.syntactic_category==syntactic_category and root.lexeme.secondary_syntactic_category==secondary_syntactic_category: predefined_morpheme_containers.extend(self.morpheme_container_map[root]) return [formatter.format_morpheme_container_for_tests(r) for r in predefined_morpheme_containers]
class PredefinedPathsTest(unittest.TestCase): @classmethod def setUpClass(cls): super(PredefinedPathsTest, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../../../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(RootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) cls.morpheme_container_map = {} cls.suffix_graph = BasicSuffixGraph() cls.suffix_graph.initialize() def setUp(self): super(PredefinedPathsTest, self).setUp() logging.basicConfig(level=logging.INFO) parser_logger.setLevel(logging.INFO) suffix_applier_logger.setLevel(logging.INFO) self.predefined_paths = PredefinedPaths(self.root_map, self.suffix_graph) def tearDown(self): self.predefined_paths = None self.morpheme_container_map = {} def test_should_have_paths_for_personal_pronouns(self): self.predefined_paths._create_predefined_path_of_ben() self.predefined_paths._create_predefined_path_of_sen() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN PERS = SecondarySyntacticCategory.PERSONAL # last one ends with transition to derivation state self.assert_defined_path( u'ben', PRON, PERS, u'ben(ben)+Pron+Pers+A1sg+Pnon+Nom', u'ben(ben)+Pron+Pers+A1sg+Pnon+Acc(i[i])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Loc(de[de])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Abl(den[den])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Ins(le[le])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Ins(imle[imle])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Gen(im[im])', u'ben(ben)+Pron+Pers+A1sg+Pnon+AccordingTo(ce[ce])', u'ben(ben)+Pron+Pers+A1sg+Pnon+Nom') self.assert_defined_path(u'ban', PRON, PERS, u'ban(ben)+Pron+Pers+A1sg+Pnon+Dat(a[a])') # last one ends with transition to derivation state self.assert_defined_path( u'sen', PRON, PERS, u'sen(sen)+Pron+Pers+A2sg+Pnon+Nom', u'sen(sen)+Pron+Pers+A2sg+Pnon+Acc(i[i])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Loc(de[de])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Abl(den[den])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Ins(le[le])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Ins(inle[inle])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Gen(in[in])', u'sen(sen)+Pron+Pers+A2sg+Pnon+AccordingTo(ce[ce])', u'sen(sen)+Pron+Pers+A2sg+Pnon+Nom') self.assert_defined_path(u'san', PRON, PERS, u'san(sen)+Pron+Pers+A2sg+Pnon+Dat(a[a])') def test_should_have_paths_for_hepsi(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_hepsi() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN # last one ends with transition to derivation state self.assert_defined_path( u'hepsi', PRON, None, u'hepsi(hepsi)+Pron+A3pl+P3pl+Nom', u'hepsi(hepsi)+Pron+A3pl+P3pl+Acc(ni[ni])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Dat(ne[ne])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Loc(nde[nde])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Abl(nden[nden])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Ins(yle[yle])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Gen(nin[nin])', u'hepsi(hepsi)+Pron+A3pl+P3pl+AccordingTo(nce[nce])', u'hepsi(hepsi)+Pron+A3pl+P3pl+Nom') # last one ends with transition to derivation state self.assert_defined_path( u'hep', PRON, None, u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Nom', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Acc(i[i])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Dat(e[e])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Loc(de[de])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Abl(den[den])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Ins(le[le])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Gen(in[in])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+AccordingTo(ce[ce])', u'hep(hepsi)+Pron+A1pl+P1pl(imiz[imiz])+Nom', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Nom', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Acc(i[i])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Dat(e[e])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Loc(de[de])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Abl(den[den])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Ins(le[le])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Gen(in[in])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+AccordingTo(ce[ce])', u'hep(hepsi)+Pron+A2pl+P2pl(iniz[iniz])+Nom') def test_should_have_paths_for_ques(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_question_particles() self.morpheme_container_map = self.predefined_paths._morpheme_container_map QUES = SyntacticCategory.QUESTION # last one ends with transition to derivation state self.assert_defined_path( u'mı', QUES, None, u'mı(mı)+Ques+Pres+A1sg(yım[yım])', u'mı(mı)+Ques+Pres+A2sg(sın[sın])', u'mı(mı)+Ques+Pres+A3sg', u'mı(mı)+Ques+Pres+A1pl(yız[yız])', u'mı(mı)+Ques+Pres+A2pl(sınız[sınız])', u'mı(mı)+Ques+Pres+A3pl(lar[lar])', u'mı(mı)+Ques+Past(ydı[ydı])+A1sg(m[m])', u'mı(mı)+Ques+Past(ydı[ydı])+A2sg(n[n])', u'mı(mı)+Ques+Past(ydı[ydı])+A3sg', u'mı(mı)+Ques+Past(ydı[ydı])+A1pl(k[k])', u'mı(mı)+Ques+Past(ydı[ydı])+A2pl(nız[nız])', u'mı(mı)+Ques+Past(ydı[ydı])+A3pl(lar[lar])', u'mı(mı)+Ques+Narr(ymış[ymış])+A1sg(ım[ım])', u'mı(mı)+Ques+Narr(ymış[ymış])+A2sg(sın[sın])', u'mı(mı)+Ques+Narr(ymış[ymış])+A3sg', u'mı(mı)+Ques+Narr(ymış[ymış])+A1pl(ız[ız])', u'mı(mı)+Ques+Narr(ymış[ymış])+A2pl(sınız[sınız])', u'mı(mı)+Ques+Narr(ymış[ymış])+A3pl(lar[lar])') def test_should_have_paths_for_pronouns_with_implicit_possession(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_bazilari_bazisi() self.predefined_paths._create_predefined_path_of_kimileri_kimisi_kimi() self.predefined_paths._create_predefined_path_of_birileri_birisi_biri() self.predefined_paths._create_predefined_path_of_hicbirisi_hicbiri() self.predefined_paths._create_predefined_path_of_birbiri() self.predefined_paths._create_predefined_path_of_cogu_bircogu_coklari_bircoklari( ) self.predefined_paths._create_predefined_path_of_birkaci() self.predefined_paths._create_predefined_path_of_cumlesi() self.predefined_paths._create_predefined_path_of_digeri_digerleri() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN self.assert_defined_path( u'bazıları', PRON, None, u'bazıları(bazıları)+Pron+A3sg+P3sg', u'bazıları(bazıları)+Pron+A3sg+P1pl(mız[mız])', u'bazıları(bazıları)+Pron+A3sg+P2pl(nız[nız])') self.assert_defined_path(u'bazısı', PRON, None, u'bazısı(bazısı)+Pron+A3sg+P3sg') self.assert_defined_path( u'kimileri', PRON, None, u'kimileri(kimileri)+Pron+A3sg+P3sg', u'kimileri(kimileri)+Pron+A3sg+P1pl(miz[miz])', u'kimileri(kimileri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'kimisi', PRON, None, u'kimisi(kimisi)+Pron+A3sg+P3sg') self.assert_defined_path(u'kimi', PRON, None, u'kimi(kimi)+Pron+A3sg+P3sg', u'kimi(kimi)+Pron+A3sg+P1pl(miz[miz])', u'kimi(kimi)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path( u'birileri', PRON, None, u'birileri(birileri)+Pron+A3sg+P3sg', u'birileri(birileri)+Pron+A3sg+P1pl(miz[miz])', u'birileri(birileri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'birisi', PRON, None, u'birisi(birisi)+Pron+A3sg+P3sg') self.assert_defined_path(u'biri', PRON, None, u'biri(biri)+Pron+A3sg+P3sg', u'biri(biri)+Pron+A3sg+P1pl(miz[miz])', u'biri(biri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'hiçbirisi', PRON, None, u'hiçbirisi(hiçbirisi)+Pron+A3sg+P3sg') self.assert_defined_path(u'hiçbiri', PRON, None, u'hiçbiri(hiçbiri)+Pron+A3sg+P3sg', u'hiçbiri(hiçbiri)+Pron+A3sg+P1pl(miz[miz])', u'hiçbiri(hiçbiri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path(u'birbiri', PRON, None, u'birbiri(birbiri)+Pron+A3sg+P3sg', u'birbiri(birbiri)+Pron+A1pl+P1pl(miz[miz])', u'birbiri(birbiri)+Pron+A2pl+P2pl(niz[niz])') self.assert_defined_path( u'birbir', PRON, None, u'birbir(birbiri)+Pron+A3pl+P3pl(leri[leri])') self.assert_defined_path(u'çoğu', PRON, None, u'çoğu(çoğu)+Pron+A3sg+P3sg', u'çoğu(çoğu)+Pron+A3sg+P1pl(muz[muz])', u'çoğu(çoğu)+Pron+A3sg+P2pl(nuz[nuz])') self.assert_defined_path(u'birçoğu', PRON, None, u'birçoğu(birçoğu)+Pron+A3sg+P3sg', u'birçoğu(birçoğu)+Pron+A3sg+P1pl(muz[muz])', u'birçoğu(birçoğu)+Pron+A3sg+P2pl(nuz[nuz])') self.assert_defined_path(u'çokları', PRON, None, u'çokları(çokları)+Pron+A3sg+P3pl') self.assert_defined_path(u'birçokları', PRON, None, u'birçokları(birçokları)+Pron+A3sg+P3pl') self.assert_defined_path(u'birkaçı', PRON, None, u'birkaçı(birkaçı)+Pron+A3sg+P3sg', u'birkaçı(birkaçı)+Pron+A3sg+P1pl(mız[mız])', u'birkaçı(birkaçı)+Pron+A3sg+P2pl(nız[nız])') self.assert_defined_path(u'cümlesi', PRON, None, u'cümlesi(cümlesi)+Pron+A3sg+P3sg') self.assert_defined_path(u'diğeri', PRON, None, u'diğeri(diğeri)+Pron+A3sg+P3sg', u'diğeri(diğeri)+Pron+A3sg+P1pl(miz[miz])', u'diğeri(diğeri)+Pron+A3sg+P2pl(niz[niz])') self.assert_defined_path( u'diğerleri', PRON, None, u'diğerleri(diğerleri)+Pron+A3sg+P3pl', u'diğerleri(diğerleri)+Pron+A3sg+P1pl(miz[miz])', u'diğerleri(diğerleri)+Pron+A3sg+P2pl(niz[niz])') def test_should_have_paths_for_irregular_pronouns(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_herkes() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN self.assert_defined_path(u'herkes', PRON, None, u'herkes(herkes)+Pron+A3sg+Pnon') def test_should_have_paths_for_pronouns_bura_sura_ora(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_ora_bura_sura_nere() self.morpheme_container_map = self.predefined_paths._morpheme_container_map PRON = SyntacticCategory.PRONOUN self.assert_defined_path(u'or', PRON, None, u'or(ora)+Pron+A3sg+Pnon+Loc(da[da])', u'or(ora)+Pron+A3sg+Pnon+Abl(dan[dan])') self.assert_defined_path(u'bur', PRON, None, u'bur(bura)+Pron+A3sg+Pnon+Loc(da[da])', u'bur(bura)+Pron+A3sg+Pnon+Abl(dan[dan])') self.assert_defined_path(u'şur', PRON, None, u'şur(şura)+Pron+A3sg+Pnon+Loc(da[da])', u'şur(şura)+Pron+A3sg+Pnon+Abl(dan[dan])') self.assert_defined_path( u'ner', PRON, SecondarySyntacticCategory.QUESTION, u'ner(nere)+Pron+Ques+A3sg+Pnon+Loc(de[de])', u'ner(nere)+Pron+Ques+A3sg+Pnon+Abl(den[den])') def test_should_have_paths_for_iceri_disari(self): parser_logger.setLevel(logging.DEBUG) suffix_applier_logger.setLevel(logging.DEBUG) self.predefined_paths._create_predefined_path_of_iceri_disari() self.morpheme_container_map = self.predefined_paths._morpheme_container_map NOUN = SyntacticCategory.NOUN self.assert_defined_path( u'içer', NOUN, None, u'i\xe7er(i\xe7eri)+Noun+A3sg+Pnon+Loc(de[de])', u'i\xe7er(i\xe7eri)+Noun+A3sg+Pnon+Abl(den[den])', u'i\xe7er(i\xe7eri)+Noun+A3sg+P3sg(si[si])') self.assert_defined_path( u'dışar', NOUN, None, u'd\u0131\u015far(d\u0131\u015far\u0131)+Noun+A3sg+Pnon+Loc(da[da])', u'd\u0131\u015far(d\u0131\u015far\u0131)+Noun+A3sg+Pnon+Abl(dan[dan])', u'd\u0131\u015far(d\u0131\u015far\u0131)+Noun+A3sg+P3sg(s\u0131[s\u0131])' ) def assert_defined_path(self, root, syntactic_category, secondary_syntactic_category, *args): assert_that( self.predefined_morpheme_containers(root, syntactic_category, secondary_syntactic_category), AreMorphemeContainersMatch([a for a in args])) def predefined_morpheme_containers(self, root_str, syntactic_category, secondary_syntactic_category): predefined_morpheme_containers = [] for root in self.morpheme_container_map.keys(): if root.str == root_str and root.lexeme.syntactic_category == syntactic_category and root.lexeme.secondary_syntactic_category == secondary_syntactic_category: predefined_morpheme_containers.extend( self.morpheme_container_map[root]) return [ formatter.format_morpheme_container_for_tests(r) for r in predefined_morpheme_containers ]