def test_find_lcs_length_calls_required_function(self, mock): """ Tests that find_lcs_length function calls fill_lcs_matrix function """ patches_sentence = ('the', 'dog', 'is', 'running') plagiarism_threshold = 0.3 find_lcs_length(patches_sentence, patches_sentence, plagiarism_threshold) self.assertTrue(mock.called)
def test_find_lcs_length_reversed_behaviour(self): """ Tests that find_lcs_length function can reverse input sentences params """ expected = 5 sentence_first = ('the', 'dog', 'is', 'running', 'inside', 'the', 'house') sentence_second = ('the', 'cat', 'is', 'sleeping', 'inside', 'the', 'house') plagiarism_threshold = 0.3 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) actual_reversed = find_lcs_length(sentence_second, sentence_first, plagiarism_threshold) self.assertEqual(expected, actual) self.assertEqual(expected, actual_reversed)
def test_find_lcs_length_empty_input(self): """ Tests that find_lcs_length function can handle empty input params """ expected = 0 empty_sentence = () patches_sentence = ('a', 'boy', 'plays', 'with', 'ball') plagiarism_threshold = 0.3 actual = find_lcs_length(empty_sentence, patches_sentence, plagiarism_threshold) actual_reversed = find_lcs_length(patches_sentence, empty_sentence, plagiarism_threshold) self.assertEqual(expected, actual) self.assertEqual(expected, actual_reversed)
def test_find_lcs_length_different_sized_inputs(self): """ Tests that find_lcs_length function can handle different sized token inputs """ expected = 3 sentence_first = ('the', 'dog', 'is', 'running', 'inside') sentence_second = ('the', 'cat', 'is', 'sleeping', 'inside', 'the', 'house') plagiarism_threshold = 0.3 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) actual_reversed = find_lcs_length(sentence_second, sentence_first, plagiarism_threshold) self.assertEqual(expected, actual) self.assertEqual(expected, actual_reversed)
def test_find_lcs_length_incorrect_inputs(self): """ Tests that find_lcs_length function can handle incorrect inputs """ expected = -1 bad_inputs = [[], {}, '', 9.22, -1, 0, -6, None, True, (None, None)] patches_sentence = ('the', 'dog', 'is', 'running') plagiarism_threshold = 0.3 for bad_input in bad_inputs: actual = find_lcs_length(bad_input, patches_sentence, plagiarism_threshold) actual_reversed = find_lcs_length(patches_sentence, bad_input, plagiarism_threshold) self.assertEqual(expected, actual) self.assertEqual(expected, actual_reversed)
def test_find_lcs_length_output_check(self): """ Tests that find_lcs_length function can generate correct output according to given params """ sentence_first = ('the', 'dog', 'is', 'running', 'here') sentence_second = ('a', 'boy', 'plays', 'with', 'ball') plagiarism_threshold = 0.3 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) self.assertTrue(isinstance(actual, int))
def test_find_lcs_length_threshold_behaviour(self): """ Tests that find_lcs_length function can preprocess threshold inputs """ sentence_first = ('the', 'dog', 'is') sentence_second = ('the', 'cat', 'is') plagiarism_threshold = 0.3 not_expected = 2/3 # 2/3 < 0.3 = 0 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) self.assertNotEqual(not_expected, actual)
def test_find_lcs_length_incorrect_threshold(self): """ Tests that find_lcs_length function can handle incorrect threshold input """ expected = -1 bad_inputs = [[], {}, '', -1, -6.34, -6, 1.2, None, True, (None, None)] patches_sentence = ('the', 'dog', 'is', 'running') for bad_input in bad_inputs: actual = find_lcs_length(patches_sentence, patches_sentence, bad_input) self.assertEqual(expected, actual)
def test_find_lcs_length_optimized_works_faster(self): """ Tests find_lcs_optimized function can work faster than find_lcs_length function """ sentence_first = ('the', 'dog', 'is', 'running', 'here') sentence_second = ('a', 'boy', 'plays', 'with', 'ball') plagiarism_threshold = 0.3 start_time = timeit.default_timer() find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) end_time = timeit.default_timer() not_optimized = end_time - start_time start_time_second = timeit.default_timer() find_lcs_length_optimized(sentence_first, sentence_second, plagiarism_threshold) end_time_second = timeit.default_timer() optimized = end_time_second - start_time_second self.assertGreater(not_optimized, optimized)
def test_find_lcs_length_no_diff(self): """ Tests that find_lcs_length function can handle fully different sentences """ expected = 0 sentence_first = ('the', 'dog', 'is', 'running', 'here') sentence_second = ('a', 'boy', 'plays', 'with', 'ball') plagiarism_threshold = 0.3 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) self.assertEqual(expected, actual)
def test_find_lcs_length_complex(self): """ Tests that find_lcs_length function can handle complex input case """ expected = 5 sentence_first = ('the', 'dog', 'is', 'running', 'inside', 'the', 'house') sentence_second = ('the', 'cat', 'is', 'sleeping', 'inside', 'the', 'house') plagiarism_threshold = 0.3 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) self.assertEqual(expected, actual)
def test_find_lcs_length_ideal(self): """ Tests that find_lcs_matrix function can handle simple input case """ expected = 2 sentence_first = ('the', 'dog', 'is', 'running') sentence_second = ('the', 'cat', 'is', 'sleeping') plagiarism_threshold = 0.3 actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold) self.assertEqual(expected, actual)
""" Longest common subsequence implementation starter """ from lab_2 import main if __name__ == "__main__": original_text = '''The horse is running. It is fast.''' second_text = '''The cow is eating. It is slow.''' original_tokens = main.tokenize_by_lines(original_text) print('Original tokens: ', original_tokens) second_tokens = main.tokenize_by_lines(second_text) print('Second tokens: ', second_tokens) plagiarism_threshold = 0.3 zero_matrix = main.create_zero_matrix(len(original_tokens), len(second_tokens)) lcs_matrix = main.fill_lcs_matrix(original_tokens[0], second_tokens[0]) print('LCS matrix: ', lcs_matrix) lcs_length = main.find_lcs_length(original_tokens[0], second_tokens[0], plagiarism_threshold) print('LCS is', lcs_length) RESULT = lcs_length assert RESULT == lcs_length, "Not working"