def test_generate_likelihood_of_one_word_given_two_context_words(self): # query_logger.setLevel(logging.DEBUG) # collocation_likelihood_calculator_logger.setLevel(logging.DEBUG) leading_context = [[MockMorphemeContainerBuilder.builder(None, u"gençten", "Noun").stem(u"genç", "Noun").lexeme(u"genç", "Adj").build()],[MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]] surface = u'erkek' following_context = [[MockMorphemeContainerBuilder.builder(None, u"girdi", "Verb").stem(u"gir", "Verb").lexeme(u"gir", "Verb").build()], [MockMorphemeContainerBuilder.builder(None, u".", "Punc").build()]] self._test_generate_likelihood(surface=surface, leading_context=leading_context, following_context=following_context)
def test_generate_likelihood_of_one_word_given_two_context_words(self): leading_context = [[MockMorphemeContainerBuilder.builder(None, u"gençten", "Noun").stem(u"genç", "Noun").lexeme(u"genç", "Adj").build()],[MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]] surface = u'erkek' following_context = [[MockMorphemeContainerBuilder.builder(None, u"girdi", "Verb").stem(u"gir", "Verb").lexeme(u"gir", "Verb").build()], [MockMorphemeContainerBuilder.builder(None, u".", "Punc").build()]] calculation_context = {} self._test_generate_likelihood(surface=surface, leading_context=leading_context, following_context=following_context, calculation_context=calculation_context) pprint.pprint(calculation_context)
def test_generate_likelihood_of_one_word_given_one_following_context_word_sc1(self): # query_logger.setLevel(logging.DEBUG) # collocation_likelihood_calculator_logger.setLevel(logging.DEBUG) context = [[MockMorphemeContainerBuilder.builder(None, u"girdi", "Noun").stem(u"gir", "Verb").lexeme(u"gir", "Verb").build()]] surface = u'erkek' self._test_generate_likelihood(surface=surface, leading_context=None, following_context=context)
def test_generate_likelihood_of_one_word_given_one_leading_context_word_sc4(self): # query_logger.setLevel(logging.DEBUG) # collocation_likelihood_calculator_logger.setLevel(logging.DEBUG) context = [[MockMorphemeContainerBuilder.builder(None, u"Kerem", "Noun", "Prop").build()]] surface = u'ter' self._test_generate_likelihood(surface=surface, leading_context=context, following_context=None)
def test_generate_likelihood_of_one_word_given_one_leading_context_word(self): context = [[MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]] surface = u'erkek' calculation_context = {} self._test_generate_likelihood(surface=surface, leading_context=context, calculation_context=calculation_context) pprint.pprint(calculation_context)
def test_generate_likelihood_of_one_word_given_two_context_words(self): leading_context = [[ MockMorphemeContainerBuilder.builder( None, u"gençten", "Noun").stem(u"genç", "Noun").lexeme(u"genç", "Adj").build() ], [MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build()]] surface = u'erkek' following_context = [[ MockMorphemeContainerBuilder.builder(None, u"girdi", "Verb").stem( u"gir", "Verb").lexeme(u"gir", "Verb").build() ], [MockMorphemeContainerBuilder.builder(None, u".", "Punc").build()]] calculation_context = {} self._test_generate_likelihood(surface=surface, leading_context=leading_context, following_context=following_context, calculation_context=calculation_context) pprint.pprint(calculation_context)
def test_generate_likelihood_of_one_word_given_one_leading_context_word( self): context = [[ MockMorphemeContainerBuilder.builder(None, u"bir", "Det").build() ]] surface = u'erkek' calculation_context = {} self._test_generate_likelihood(surface=surface, leading_context=context, calculation_context=calculation_context) pprint.pprint(calculation_context)
def _create_mock_container(self, word): if isinstance(word, UnparsableWordBinding): print u'Previous word is unparsable, skipped : {}'.format(word.str) return None surface_str, surface_syntactic_category = word.str, word.syntactic_category stem_str, stem_syntactic_category, stem_secondary_syntactic_category = WordNGramGenerator._get_stem(word) lemma_root_str, lemma_root_syntactic_category = word.root.lemma_root, word.root.syntactic_category if word.secondary_syntactic_category: surface_syntactic_category += u'_' + word.secondary_syntactic_category if stem_secondary_syntactic_category: stem_syntactic_category += u'_' + stem_secondary_syntactic_category if word.root.secondary_syntactic_category: lemma_root_syntactic_category += u'_' + word.root.secondary_syntactic_category return MockMorphemeContainerBuilder.builder(word.format(), surface_str, surface_syntactic_category).stem(stem_str, stem_syntactic_category).lexeme(lemma_root_str, lemma_root_syntactic_category).build()
def _create_mock_container(self, word): if isinstance(word, UnparsableWordBinding): print u'Previous word is unparsable, skipped : {}'.format(word.str) return None surface_str, surface_syntactic_category = word.str, word.syntactic_category stem_str, stem_syntactic_category, stem_secondary_syntactic_category = WordNGramGenerator._get_stem( word) lemma_root_str, lemma_root_syntactic_category = word.root.lemma_root, word.root.syntactic_category if word.secondary_syntactic_category: surface_syntactic_category += u'_' + word.secondary_syntactic_category if stem_secondary_syntactic_category: stem_syntactic_category += u'_' + stem_secondary_syntactic_category if word.root.secondary_syntactic_category: lemma_root_syntactic_category += u'_' + word.root.secondary_syntactic_category return MockMorphemeContainerBuilder.builder( word.format(), surface_str, surface_syntactic_category).stem( stem_str, stem_syntactic_category).lexeme( lemma_root_str, lemma_root_syntactic_category).build()
def _build_parse_context_item_from_word(self, parse_context_word): if parse_context_word['parsed']: return [ MockMorphemeContainerBuilder( parse_context_word.get('parse_result'), parse_context_word['surface'], parse_context_word['surface_syntactic_category'], parse_context_word['surface_secondary_syntactic_category'] )\ .stem( parse_context_word['stem'], parse_context_word['stem_syntactic_category'], parse_context_word['stem_secondary_syntactic_category'], ).lexeme( parse_context_word['lemma_root'], parse_context_word['lemma_root_syntactic_category'], parse_context_word['lemma_root_secondary_syntactic_category'] )\ .build() ] else: return self.morphological_parser.parse( parse_context_word['surface'])