def test_calculate_frequences_not_string(self): """ Text is int """ expected_result = {} res = main.calculate_frequences(1) self.assertEqual(expected_result, res)
def test_calculate_frequences_none(self): """ Text is None """ expected_result = {} res = main.calculate_frequences(None) self.assertEqual(expected_result, res)
def test_calculate_frequences_empty(self): """ Text is empty """ expected_result = {} res = main.calculate_frequences('') self.assertEqual(expected_result, res)
def test_calculate_frequences_one_word(self): """ Text is dirty """ sample_text = """hi""" expected_result = {'hi': 1} res = main.calculate_frequences(sample_text) self.assertEqual(expected_result, res)
def test_calculate_frequences_right_count(self): """ Check counting """ sample_text = "a a a a b b b c c d" expected_result = {'a': 4, 'b': 3, 'c': 2, 'd': 1} res = main.calculate_frequences(sample_text) self.assertEqual(expected_result, res)
def test_calculate_frequences_digits(self): """ Text with digits """ sample_text = '4 8 15 16 23 42 Dharma Initiative' expected_result = {'dharma': 1, 'initiative': 1} res = main.calculate_frequences(sample_text) self.assertEqual(expected_result, res)
def test_calculate_frequences_inapropriate_symbols_only(self): """ Text is dirty """ sample_text = """ @ 3 $ % \n * & ^%$ """ expected_result = {} res = main.calculate_frequences(sample_text) self.assertEqual(expected_result, res)
def test_calculate_frequences_punctuation(self): """ Text with punctuation marks """ sample_text = "The: quick brown fox, 'jumps' over, the *lazy dog ~" expected_result = { 'the': 2, 'quick': 1, 'brown': 1, 'fox': 1, 'jumps': 1, 'over': 1, 'lazy': 1, 'dog': 1 } res = main.calculate_frequences(sample_text) self.assertEqual(expected_result, res)
def test_calculate_frequences_ideal(self): """ Ideal scenario. Good text. """ expected_result = { 'the': 2, 'quick': 1, 'brown': 1, 'fox': 1, 'jumps': 1, 'over': 1, 'lazy': 1, 'dog': 1 } res = main.calculate_frequences(SAMPLE_TEXT) self.assertEqual(expected_result, res)
def test_calculate_frequences_multilines_punctuation(self): """ Text in several lines """ sample_text = """The quick* brown \n "fox" jumps \n over~ the, lazy \n dog ^ """ expected_result = { 'the': 2, 'quick': 1, 'brown': 1, 'fox': 1, 'jumps': 1, 'over': 1, 'lazy': 1, 'dog': 1 } res = main.calculate_frequences(sample_text) self.assertEqual(expected_result, res)
break else: correct_word = spell_check_word(frequencies, as_is_words, element) new_list_correct.append(correct_word) # Step 3. Making new string out of correct list. new_str_text = '' for element in new_list_correct: new_str_text = new_str_text + element + ' ' # Step 4. Making identical text with corrected words. new_str_text_final = '' for element_letter in range(len(new_str_text)): if new_str_text[element_letter] in symbols_to_save: new_str_text_final = new_str_text_final[:-1] + new_str_text[ element_letter] continue new_str_text_final += new_str_text[element_letter] return new_str_text_final # The first word will be 'has', not 'this', # because program deletes symbols (first one) and finds 'has' in dictionary, # and obviously 'has' is more frequent than 'this'. # Other mistakes have been found correctly. string = 'Thas is My Tezt, to Chekc Punctyation. Tahank yuo for watching? Bapital Leters included...' big_dict = calculate_frequences(REFERENCE_TEXT) str_final = spell_check_text(big_dict, (), string) # print(str_final)