def test_find_lcs_length_calls_required_function(self, mock):
        """
        Tests that find_lcs_length function
            calls fill_lcs_matrix function
        """
        patches_sentence = ('the', 'dog', 'is', 'running')
        plagiarism_threshold = 0.3

        find_lcs_length(patches_sentence, patches_sentence, plagiarism_threshold)
        self.assertTrue(mock.called)
    def test_find_lcs_length_reversed_behaviour(self):
        """
        Tests that find_lcs_length function
            can reverse input sentences params
        """
        expected = 5
        sentence_first = ('the', 'dog', 'is', 'running', 'inside', 'the', 'house')
        sentence_second = ('the', 'cat', 'is', 'sleeping', 'inside', 'the', 'house')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        actual_reversed = find_lcs_length(sentence_second, sentence_first, plagiarism_threshold)
        self.assertEqual(expected, actual)
        self.assertEqual(expected, actual_reversed)
    def test_find_lcs_length_empty_input(self):
        """
        Tests that find_lcs_length function
            can handle empty input params
        """
        expected = 0

        empty_sentence = ()
        patches_sentence = ('a', 'boy', 'plays', 'with', 'ball')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(empty_sentence, patches_sentence, plagiarism_threshold)
        actual_reversed = find_lcs_length(patches_sentence, empty_sentence, plagiarism_threshold)
        self.assertEqual(expected, actual)
        self.assertEqual(expected, actual_reversed)
    def test_find_lcs_length_different_sized_inputs(self):
        """
        Tests that find_lcs_length function
            can handle different sized token inputs
        """
        expected = 3

        sentence_first = ('the', 'dog', 'is', 'running', 'inside')
        sentence_second = ('the', 'cat', 'is', 'sleeping', 'inside', 'the', 'house')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        actual_reversed = find_lcs_length(sentence_second, sentence_first, plagiarism_threshold)
        self.assertEqual(expected, actual)
        self.assertEqual(expected, actual_reversed)
    def test_find_lcs_length_incorrect_inputs(self):
        """
        Tests that find_lcs_length function
            can handle incorrect inputs
        """
        expected = -1
        bad_inputs = [[], {}, '', 9.22, -1, 0, -6, None, True, (None, None)]
        patches_sentence = ('the', 'dog', 'is', 'running')
        plagiarism_threshold = 0.3

        for bad_input in bad_inputs:
            actual = find_lcs_length(bad_input, patches_sentence, plagiarism_threshold)
            actual_reversed = find_lcs_length(patches_sentence, bad_input, plagiarism_threshold)
            self.assertEqual(expected, actual)
            self.assertEqual(expected, actual_reversed)
    def test_find_lcs_length_output_check(self):
        """
        Tests that find_lcs_length function
            can generate correct output according to given params
        """
        sentence_first = ('the', 'dog', 'is', 'running', 'here')
        sentence_second = ('a', 'boy', 'plays', 'with', 'ball')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        self.assertTrue(isinstance(actual, int))
    def test_find_lcs_length_threshold_behaviour(self):
        """
        Tests that find_lcs_length function
            can preprocess threshold inputs
        """
        sentence_first = ('the', 'dog', 'is')
        sentence_second = ('the', 'cat', 'is')
        plagiarism_threshold = 0.3

        not_expected = 2/3  # 2/3 < 0.3 = 0
        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        self.assertNotEqual(not_expected, actual)
    def test_find_lcs_length_incorrect_threshold(self):
        """
        Tests that find_lcs_length function
            can handle incorrect threshold input
        """
        expected = -1
        bad_inputs = [[], {}, '', -1, -6.34, -6, 1.2, None, True, (None, None)]
        patches_sentence = ('the', 'dog', 'is', 'running')

        for bad_input in bad_inputs:
            actual = find_lcs_length(patches_sentence, patches_sentence, bad_input)
            self.assertEqual(expected, actual)
    def test_find_lcs_length_optimized_works_faster(self):
        """
        Tests find_lcs_optimized function
            can work faster than find_lcs_length function
        """
        sentence_first = ('the', 'dog', 'is', 'running', 'here')
        sentence_second = ('a', 'boy', 'plays', 'with', 'ball')
        plagiarism_threshold = 0.3

        start_time = timeit.default_timer()
        find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        end_time = timeit.default_timer()
        not_optimized = end_time - start_time

        start_time_second = timeit.default_timer()
        find_lcs_length_optimized(sentence_first, sentence_second,
                                  plagiarism_threshold)
        end_time_second = timeit.default_timer()
        optimized = end_time_second - start_time_second

        self.assertGreater(not_optimized, optimized)
    def test_find_lcs_length_no_diff(self):
        """
        Tests that find_lcs_length function
            can handle fully different sentences
        """
        expected = 0

        sentence_first = ('the', 'dog', 'is', 'running', 'here')
        sentence_second = ('a', 'boy', 'plays', 'with', 'ball')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        self.assertEqual(expected, actual)
    def test_find_lcs_length_complex(self):
        """
        Tests that find_lcs_length function
            can handle complex input case
        """
        expected = 5

        sentence_first = ('the', 'dog', 'is', 'running', 'inside', 'the', 'house')
        sentence_second = ('the', 'cat', 'is', 'sleeping', 'inside', 'the', 'house')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        self.assertEqual(expected, actual)
    def test_find_lcs_length_ideal(self):
        """
        Tests that find_lcs_matrix function
            can handle simple input case
        """
        expected = 2

        sentence_first = ('the', 'dog', 'is', 'running')
        sentence_second = ('the', 'cat', 'is', 'sleeping')
        plagiarism_threshold = 0.3

        actual = find_lcs_length(sentence_first, sentence_second, plagiarism_threshold)
        self.assertEqual(expected, actual)
Пример #13
0
"""
Longest common subsequence implementation starter
"""
from lab_2 import main

if __name__ == "__main__":
    original_text = '''The horse is running.
      It is fast.'''
    second_text = '''The cow is eating. 
     It is slow.'''

    original_tokens = main.tokenize_by_lines(original_text)
    print('Original tokens: ', original_tokens)

    second_tokens = main.tokenize_by_lines(second_text)
    print('Second tokens: ', second_tokens)

    plagiarism_threshold = 0.3
    zero_matrix = main.create_zero_matrix(len(original_tokens),
                                          len(second_tokens))

    lcs_matrix = main.fill_lcs_matrix(original_tokens[0], second_tokens[0])
    print('LCS matrix: ', lcs_matrix)

    lcs_length = main.find_lcs_length(original_tokens[0], second_tokens[0],
                                      plagiarism_threshold)
    print('LCS is', lcs_length)

    RESULT = lcs_length
    assert RESULT == lcs_length, "Not working"