Exemplo n.º 1
0
    def test_it_fill_dictionaries_with_treetagger(self):
        """
        Tests the class method of the KeywordExtractor _fill_dictionaries_with_treetagger.
        Passes an Italian sentence to this method and checks the content of dictionaries and sets filled by it.
        """
        kw_extractor = KeywordExtractor(
            os.path.join(self.script_folder, "test"), "small-mixed2.txt",
            self.output_folder)
        kw_extractor._fill_dictionaries_with_treetagger(
            'Ieri il primario di Casa Basaglia Lorenzo Toresini è andato in pensione.',
            1.0, ('il', 'di', 'essere', 'casa', 'in'))

        self.assertEqual(
            kw_extractor.noun_lemma_dict, {
                'primario': 1.0,
                'Basaglia': 1.0,
                'Lorenzo': 1.0,
                'Toresini': 1.0,
                'pensione': 1.0,
                'andare': 1.0
            })
        self.assertEqual(
            kw_extractor.token_to_lemma_dict_original_case, {
                'Basaglia': 'basaglia',
                'Casa': 'casa',
                'Ieri': 'ieri',
                'Lorenzo': 'lorenzo',
                'Toresini': 'toresini',
                'andato': 'andare',
                'di': 'di',
                'il': 'il',
                'in': 'in',
                'pensione': 'pensione',
                'primario': 'primario',
                'è': 'essere'
            })
        self.assertEqual(
            kw_extractor.lemma_token_to_POS, {
                'Basaglia': {'NOM'},
                'Casa': {'NPR'},
                'Ieri': {'ADV'},
                'Lorenzo': {'NPR'},
                'Toresini': {'NOM'},
                'andare': {'VER:pper'},
                'andato': {'VER:pper'},
                'di': {'PRE'},
                'essere': {'VER:pres'},
                'il': {'DET:def'},
                'in': {'PRE'},
                'pensione': {'NOM'},
                'primario': {'NOM'},
                'è': {'VER:pres'}
            })
        self.assertEqual(kw_extractor.title_noun_lemmas_dict, {})
        self.assertEqual(kw_extractor.tree_taggers_proper_nouns,
                         {'Basaglia', 'Lorenzo', 'Toresini'})
Exemplo n.º 2
0
    def test_de_fill_dictionaries_with_treetagger(self):
        """
        Tests the class method of the KeywordExtractor _fill_dictionaries_with_treetagger.
        Passes a German sentence to this method and checks the content of dictionaries and sets filled by it.
        """
        kw_extractor = KeywordExtractor(
            os.path.join(self.script_folder, "test"), "small-mixed.txt",
            self.output_folder)
        kw_extractor._fill_dictionaries_with_treetagger(
            'Antworten auf diese Fragen gab es aus dem Passeiertal bereits einige.',
            1.0, ('auf', 'dem', 'es', 'aus'))

        self.assertEqual(kw_extractor.noun_lemma_dict, {
            'Antwort': 1.0,
            'Frage': 1.0,
            'Passeiertal': 1.0,
            'geben': 1.0
        })
        self.assertEqual(
            kw_extractor.token_to_lemma_dict_original_case, {
                'Fragen': 'frage',
                'bereits': 'bereits',
                'es': 'es',
                'aus': 'aus',
                'gab': 'geben',
                'auf': 'auf',
                'Passeiertal': 'passeiertal',
                'Antworten': 'antwort',
                'diese': 'dies',
                'einige': 'einige',
                'dem': 'die'
            })
        self.assertEqual(
            kw_extractor.lemma_token_to_POS, {
                'Frage': {'NN'},
                'die': {'ART'},
                'bereits': {'ADV'},
                'einige': {'PIS'},
                'Antworten': {'NN'},
                'auf': {'APPR'},
                'diese': {'PDAT'},
                'dem': {'ART'},
                'Fragen': {'NN'},
                'geben': {'VVFIN'},
                'Antwort': {'NN'},
                'aus': {'APPR'},
                'gab': {'VVFIN'},
                'Passeiertal': {'NN'},
                'dies': {'PDAT'},
                'es': {'PPER'}
            })
        self.assertEqual(kw_extractor.title_noun_lemmas_dict, {})
        self.assertEqual(kw_extractor.tree_taggers_proper_nouns, set())