示例#1
0
 def test_check_calculate_texts_none(self):
     """check tf calculation all texts are none case"""
     clean_texts = None
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_tf()
     expected_res = []
     self.assertCountEqual(tf_instance.tf_values, expected_res)
示例#2
0
 def test_check_calculate_all_none(self):
     """check idf calculation all texts are none case"""
     clean_texts = [None, None]
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_idf()
     expected_res = {}
     self.assertEqual(tf_instance.idf_values, expected_res)
    def test_check_calculate_tf_idf_none_tf(self):
        """check tf_idf calculation none tf_none"""
        clean_texts = []
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.tf_values = None
        expected_res = []

        tf_instance.calculate()
        self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
    def test_check_calculate_tf_idf_no_tf(self):
        """check tf_idf calculation no tf"""
        clean_texts = []
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.tf_values = []
        tf_instance.idf_values = {
            'this': math.log(3 / 2),
            'is': math.log(3 / 3),
            'an': math.log(3 / 1),
            'example': math.log(3 / 1),
            'of': math.log(3 / 1),
            'test': math.log(3 / 2),
            'text': math.log(3 / 2),
            'contains': math.log(3 / 1),
            'two': math.log(3 / 1),
            'sentences': math.log(3 / 1),
            'written': math.log(3 / 1),
            'on': math.log(3 / 1),
            'english': math.log(3 / 1),
            'simple': math.log(3 / 1),
            'third': math.log(3 / 1),
            'one': math.log(3 / 1),
            'there': math.log(3 / 1),
            'no': math.log(3 / 1),
            'much': math.log(3 / 1),
            'sense': math.log(3 / 1),
        }
        expected_res = []

        tf_instance.calculate()
        self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
示例#5
0
 def test_check_calculate_one_none(self):
     """check idf calculation one text is none case"""
     clean_texts = [
         None,
         [
             'this', 'is', 'test', 'text', 'text', 'is', 'written', 'on',
             'english', 'text', 'is', 'simple'
         ]
     ]
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_idf()
     expected_res = {
         'this': math.log(1 / 1),
         'is': math.log(1 / 1),
         'test': math.log(1 / 1),
         'text': math.log(1 / 1),
         'written': math.log(1 / 1),
         'on': math.log(1 / 1),
         'english': math.log(1 / 1),
         'simple': math.log(1 / 1)
     }
     self.assertEqual(tf_instance.idf_values, expected_res)
示例#6
0
 def test_check_calculate_tf_elements_not_str(self):
     """check tf calculation with non str elements"""
     clean_texts = [[
         'this', 'is', 'an', 'example', 'of', 'test', 'text', 'text',
         'contains', 'two', 'sentences', 123, (), [1, 2, 3], 2 * 4
     ],
                    [
                        'this', 'is', 'test', 'text', 'text', 'is',
                        'written', 'on', 'english', 123, 'text', 'is',
                        'simple'
                    ]]
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_tf()
     expected_res = [
         {
             'this': 1 / 11,
             'is': 1 / 11,
             'an': 1 / 11,
             'example': 1 / 11,
             'of': 1 / 11,
             'test': 1 / 11,
             'text': 2 / 11,
             'contains': 1 / 11,
             'two': 1 / 11,
             'sentences': 1 / 11
         },
         {
             'this': 1 / 12,
             'is': 3 / 12,
             'test': 1 / 12,
             'text': 3 / 12,
             'written': 1 / 12,
             'on': 1 / 12,
             'english': 1 / 12,
             'simple': 1 / 12
         },
     ]
     self.assertCountEqual(tf_instance.tf_values, expected_res)
示例#7
0
 def test_check_calculate_tf_ideal(self):
     """check tf calculation ideal case"""
     clean_texts = [[
         'this', 'is', 'an', 'example', 'of', 'test', 'text', 'text',
         'contains', 'two', 'sentences'
     ],
                    [
                        'this', 'is', 'test', 'text', 'text', 'is',
                        'written', 'on', 'english', 'text', 'is', 'simple'
                    ]]
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_tf()
     expected_res = [
         {
             'this': 1 / 11,
             'is': 1 / 11,
             'an': 1 / 11,
             'example': 1 / 11,
             'of': 1 / 11,
             'test': 1 / 11,
             'text': 2 / 11,
             'contains': 1 / 11,
             'two': 1 / 11,
             'sentences': 1 / 11
         },
         {
             'this': 1 / 12,
             'is': 3 / 12,
             'test': 1 / 12,
             'text': 3 / 12,
             'written': 1 / 12,
             'on': 1 / 12,
             'english': 1 / 12,
             'simple': 1 / 12
         },
     ]
     self.assertCountEqual(tf_instance.tf_values, expected_res)
示例#8
0
    def test_check_calculate_idf_ideal(self):
        """check idf calculation ideal case"""
        clean_texts = [[
            'this', 'is', 'an', 'example', 'of', 'test', 'text', 'text',
            'contains', 'two', 'sentences'
        ],
                       [
                           'this', 'is', 'test', 'text', 'text', 'is',
                           'written', 'on', 'english', 'text', 'is', 'simple'
                       ],
                       ['third', 'one', 'there', 'is', 'no', 'much', 'sense']]
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.calculate_idf()
        expected_res = {
            'this': math.log(3 / 2),
            'is': math.log(3 / 3),
            'an': math.log(3 / 1),
            'example': math.log(3 / 1),
            'of': math.log(3 / 1),
            'test': math.log(3 / 2),
            'text': math.log(3 / 2),
            'contains': math.log(3 / 1),
            'two': math.log(3 / 1),
            'sentences': math.log(3 / 1),
            'written': math.log(3 / 1),
            'on': math.log(3 / 1),
            'english': math.log(3 / 1),
            'simple': math.log(3 / 1),
            'third': math.log(3 / 1),
            'one': math.log(3 / 1),
            'there': math.log(3 / 1),
            'no': math.log(3 / 1),
            'much': math.log(3 / 1),
            'sense': math.log(3 / 1),
        }

        self.assertEqual(tf_instance.idf_values, expected_res)
示例#9
0
 def test_check_calculate_one_none(self):
     """check tf calculation one text is none case"""
     clean_texts = [
         None,
         [
             'this', 'is', 'test', 'text', 'text', 'is', 'written', 'on',
             'english', 'text', 'is', 'simple'
         ]
     ]
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_tf()
     expected_res = [
         {
             'this': 1 / 12,
             'is': 3 / 12,
             'test': 1 / 12,
             'text': 3 / 12,
             'written': 1 / 12,
             'on': 1 / 12,
             'english': 1 / 12,
             'simple': 1 / 12
         },
     ]
     self.assertCountEqual(tf_instance.tf_values, expected_res)
示例#10
0
 def test_check_initialization(self):
     """check instance of TfIdfCalculator initialization"""
     clean_texts = [[
         'this', 'is', 'an', 'example', 'of', 'test', 'text', 'it',
         'contains', 'two', 'sentences'
     ],
                    [
                        'das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf',
                        'deutsch', 'geschrieben'
                    ]]
     tf_instance = TfIdfCalculator(clean_texts)
     self.assertEqual(tf_instance.corpus, clean_texts)
     self.assertEqual(tf_instance.tf_values, [])
     self.assertEqual(tf_instance.idf_values, {})
     self.assertEqual(tf_instance.tf_idf_values, [])
示例#11
0
 def test_check_calculate_idf_elements_not_str(self):
     """check idf calculation with non str elements"""
     clean_texts = [[
         'this', 'is', 'an', 'example', 'of', 'test', 'text', 'text',
         'contains', 'two', 'sentences', 123, (), [1, 2, 3], 2 * 4
     ],
                    [
                        'this', 'is', 'test', 'text', 'text', 'is',
                        'written', 'on', 'english', 123, 'text', 'is',
                        'simple'
                    ],
                    ['third', 'one', 'there', 'is', 'no', 'much', 'sense']]
     tf_instance = TfIdfCalculator(clean_texts)
     tf_instance.calculate_idf()
     expected_res = {
         'this': math.log(3 / 2),
         'is': math.log(3 / 3),
         'an': math.log(3 / 1),
         'example': math.log(3 / 1),
         'of': math.log(3 / 1),
         'test': math.log(3 / 2),
         'text': math.log(3 / 2),
         'contains': math.log(3 / 1),
         'two': math.log(3 / 1),
         'sentences': math.log(3 / 1),
         'written': math.log(3 / 1),
         'on': math.log(3 / 1),
         'english': math.log(3 / 1),
         'simple': math.log(3 / 1),
         'third': math.log(3 / 1),
         'one': math.log(3 / 1),
         'there': math.log(3 / 1),
         'no': math.log(3 / 1),
         'much': math.log(3 / 1),
         'sense': math.log(3 / 1),
     }
示例#12
0
    def test_report_on_word_not_in_tfidf(self):
        """Check report_on none tf_idf"""
        clean_texts = [
            ['this', 'is', 'an', 'example', 'of', 'test', 'text', 'it', 'contains', 'two', 'sentences'],
            ['das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf', 'deutsch', 'geschrieben']
        ]
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.tf_idf_values = None

        tf_instance.calculate()

        res = tf_instance.report_on('wtf', 0)
        exp_res = ()
        self.assertEqual(res, exp_res)
示例#13
0
    def test_report_on_index_bigger(self):
        """Check report_on invalid doc index"""
        clean_texts = [
            ['this', 'is', 'an', 'example', 'of', 'test', 'text', 'it', 'contains', 'two', 'sentences'],
            ['das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf', 'deutsch', 'geschrieben']
        ]
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.tf_idf_values = [
            {
                'this': 10,
                'that': 9,
                'another': 5
            }
        ]

        tf_instance.calculate()

        res = tf_instance.report_on('this', 2)
        exp_res = ()
        self.assertEqual(res, exp_res)
    def test_check_calculate_tf_idf_no_idf(self):
        """check tf_idf calculation no idf"""
        clean_texts = []
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.tf_values = [{
            'this': 1 / 11,
            'is': 1 / 11,
            'an': 1 / 11,
            'example': 1 / 11,
            'of': 1 / 11,
            'test': 1 / 11,
            'text': 2 / 11,
            'contains': 1 / 11,
            'two': 1 / 11,
            'sentences': 1 / 11
        }, {
            'this': 1 / 12,
            'is': 3 / 12,
            'test': 1 / 12,
            'text': 3 / 12,
            'written': 1 / 12,
            'on': 1 / 12,
            'english': 1 / 12,
            'simple': 1 / 12
        }, {
            'there': 1 / 5,
            'is': 1 / 5,
            'no': 1 / 5,
            'much': 1 / 5,
            'sense': 1 / 5
        }]
        tf_instance.idf_values = {}
        expected_res = []

        tf_instance.calculate()
        self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
    def test_check_calculate_tf_idf_ideal(self):
        """check tf_idf calculation ideal case"""
        clean_texts = []
        tf_instance = TfIdfCalculator(clean_texts)
        tf_instance.tf_values = [{
            'this': 1 / 11,
            'is': 1 / 11,
            'an': 1 / 11,
            'example': 1 / 11,
            'of': 1 / 11,
            'test': 1 / 11,
            'text': 2 / 11,
            'contains': 1 / 11,
            'two': 1 / 11,
            'sentences': 1 / 11
        }, {
            'this': 1 / 12,
            'is': 3 / 12,
            'test': 1 / 12,
            'text': 3 / 12,
            'written': 1 / 12,
            'on': 1 / 12,
            'english': 1 / 12,
            'simple': 1 / 12
        }, {
            'there': 1 / 5,
            'is': 1 / 5,
            'no': 1 / 5,
            'much': 1 / 5,
            'sense': 1 / 5
        }]
        tf_instance.idf_values = {
            'this': math.log(3 / 2),
            'is': math.log(3 / 3),
            'an': math.log(3 / 1),
            'example': math.log(3 / 1),
            'of': math.log(3 / 1),
            'test': math.log(3 / 2),
            'text': math.log(3 / 2),
            'contains': math.log(3 / 1),
            'two': math.log(3 / 1),
            'sentences': math.log(3 / 1),
            'written': math.log(3 / 1),
            'on': math.log(3 / 1),
            'english': math.log(3 / 1),
            'simple': math.log(3 / 1),
            'third': math.log(3 / 1),
            'one': math.log(3 / 1),
            'there': math.log(3 / 1),
            'no': math.log(3 / 1),
            'much': math.log(3 / 1),
            'sense': math.log(3 / 1),
        }
        expected_res = [{
            'this': (1 / 11) * math.log(3 / 2),
            'is': (1 / 11) * math.log(3 / 3),
            'an': (1 / 11) * math.log(3 / 1),
            'example': (1 / 11) * math.log(3 / 1),
            'of': (1 / 11) * math.log(3 / 1),
            'test': (1 / 11) * math.log(3 / 2),
            'text': (2 / 11) * math.log(3 / 2),
            'contains': (1 / 11) * math.log(3 / 1),
            'two': (1 / 11) * math.log(3 / 1),
            'sentences': (1 / 11) * math.log(3 / 1)
        }, {
            'this': 1 / 12 * math.log(3 / 2),
            'is': 3 / 12 * math.log(3 / 3),
            'test': 1 / 12 * math.log(3 / 2),
            'text': 3 / 12 * math.log(3 / 2),
            'written': 1 / 12 * math.log(3 / 1),
            'on': 1 / 12 * math.log(3 / 1),
            'english': 1 / 12 * math.log(3 / 1),
            'simple': 1 / 12 * math.log(3 / 1)
        }, {
            'there': 1 / 5 * math.log(3 / 1),
            'is': 1 / 5 * math.log(3 / 3),
            'no': 1 / 5 * math.log(3 / 1),
            'much': 1 / 5 * math.log(3 / 1),
            'sense': 1 / 5 * math.log(3 / 1)
        }]

        tf_instance.calculate()
        self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
示例#16
0
from lab_4.main import TfIdfCalculator

clean_texts = [[
    'this', 'is', 'an', 'example', 'of', 'test', 'text', 'it', 'contains',
    'two', 'sentences'
],
               [
                   'das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf',
                   'deutsch', 'geschrieben'
               ]]
tf_instance = TfIdfCalculator(clean_texts)
tf_instance.tf_idf_values = [{'this': 10, 'that': 9, 'another': 5}]

tf_instance.calculate()

res = tf_instance.report_on('this', 0)
print(res)