def test_candidates_are_merged_into_the_bigger_range_if_they_share_the_start_index(self):
        tree = Tree(1)
        tree.build(['alpha', 'blue', 'charlie', 'delta'])

        tree.mergeCandidates([(0, 1), (0, 2)], 0)
        self.assertEqual([(0, 2)], tree.candidates)

        tree.mergeCandidates([(1, 1), (1, 4), (2, 3), (2, 5), (1, 5)], 0)
        self.assertEqual([(1, 5), (2, 5)], tree.candidates)

        tree.mergeCandidates([(1, 1), (0, 1), (1, 2), (2, 2), (0, 0)], 0)
        self.assertEqual([(0, 1), (1, 2), (2, 2)], tree.candidates)
    def init_Tree(self, entailData: str):
        """
        Function to construct tree from entailData
        """
        self.Tree = Tree(self.treeScanwindow)
        words = []
        with smart_open.smart_open(entailData, 'r') as entail:
            for line in entail:
                words.extend(line.strip().split())

        self.Tree.build(words)
        self.save_Tree()
    def test_scanWindow_1(self):
        tree = Tree(1)
        tree.build(['alpha.', 'bids,', 'blue,', 'carves', 'charlie', 'dances', 'delta', 'eats'])

        self.assertEqual(True, tree.is_contain('alpha'))
        self.assertEqual(True, tree.is_contain('alpha.'))
        self.assertEqual(True, tree.is_contain('bids'))
        self.assertEqual(True, tree.is_contain('blue'))

        self.assertEqual(False, tree.is_contain('bids.'))
        self.assertEqual(False, tree.is_contain('back'))
    def test_scanWindow_1_repeated(self):
        tree = Tree(1)
        tree.build(['alpha.', 'alpha', 'alpha,', 'bids,', 'blue,', 'carves', 'charlie', 'dances', 'delta', 'eats', 'blue', 'blue'])

        self.assertEqual(3, tree.calc_frequency('alpha'))
        self.assertEqual(1, tree.calc_frequency('alpha.'))
        self.assertEqual(1, tree.calc_frequency('bids'))
        self.assertEqual(3, tree.calc_frequency('blue'))

        self.assertEqual(0, tree.calc_frequency('bids.'))
        self.assertEqual(0, tree.calc_frequency('back'))
    def test_scanWindow_1_non_repeated_negative(self):
        tree = Tree(1)
        tree.build(['alpha.', 'bids,', 'blue,', 'carves', 'charlie', 'dances', 'delta', 'eats'])

        self.assertEqual(('', 0), tree.calc_most_frequent_next_word('alpha'))
        self.assertEqual(('', 0), tree.calc_most_frequent_next_word('alpha.'))
        self.assertEqual(('', 0), tree.calc_most_frequent_next_word('back'))
        self.assertEqual(('', 0), tree.calc_most_frequent_next_word('something'))
    def test_words_are_sorted(self):
        tree = Tree(2)
        tree.build(['alpha', 'blue', 'charlie', 'delta'])
        self.assertListIsSorted(tree.records)

        tree = Tree(2)
        tree.build(['alpha', 'zulu', 'blue', 'whisky', 'tango', 'foxtrot', 'charlie', 'delta'])
        self.assertListIsSorted(tree.records)
    def test_candidates_are_sorted(self):
        tree = Tree(1)
        tree.build(['alpha', 'blue', 'charlie', 'delta'])

        tree.mergeCandidates([(1, 1), (0, 0), (2, 2)], 0)
        self.assertEqual([(0, 0), (1, 1), (2, 2)], tree.candidates)

        tree.mergeCandidates([(0, 0), (2, 0), (1, 0)], 0)
        self.assertEqual([(0, 0), (1, 0), (2, 0)], tree.candidates)
    def test_keeps_track_of_the_amount_of_records(self):
        source_words = ['alpha', 'blue', 'charlie', 'delta']
        tree = Tree(1)
        tree.build(source_words)
        self.assertEqual(4, tree.lenRecords)

        tree = Tree(2)
        tree.build(source_words)
        self.assertEqual(4, tree.lenRecords)
    def test_ignores_punctuation_at_the_end_of_a_word(self):
        tree = Tree(1)
        tree.build(['alpha.', 'bids,', 'blue,', 'carves', 'charlie', 'dances', 'delta', 'eats'])

        tree.mergeCandidates([(0, 3)], 0)
        self.assertEqual([(0, 3)], tree.candidates)
        self.assertEqual([['alpha'], ['bids'], ['blue']], tree.get_words(0))
    def test_scanWindow_3_repeated_positive(self):
        tree = Tree(3)
        tree.build(['down', 'the', 'street', 'down', 'the', 'path', 'down', 'the', 'well'])

        self.assertEqual(('the', 3), tree.calc_most_frequent_next_word('down'))
        self.assertEqual(('path', 1), tree.calc_most_frequent_next_word('down the'))
        self.assertEqual(('path', 1), tree.calc_most_frequent_next_word('the'))
    def test_extends_the_interval_for_repeated_words(self):
        tree = Tree(1)
        tree.build(['alpha', 'alpha', 'blue', 'charlie', 'delta', 'delta'])

        self.assertEqual((0, 2), tree.find_interval('alpha'))
        self.assertEqual((2, 3), tree.find_interval('blue'))
        self.assertEqual((4, 6), tree.find_interval('delta'))
    def test_returns_word_when_single_word_records(self):
        tree = Tree(1)
        tree.build(['alpha', 'blue', 'charlie', 'delta'])

        tree.mergeCandidates([(0, 1)], 0)
        self.assertEqual([(0, 1)], tree.candidates)
        self.assertEqual([['alpha']], tree.get_words(0))
    def test_restricts_candidates_with_further_values_of_word_position(self):
        tree = Tree(3)
        tree.build(['alpha', 'blue', 'charlie', 'charlie', 'delta'])

        # records = [
        #   0  'alpha blue charlie',
        #   1  'blue charlie charlie',
        #   2  'charlie charlie delta'
        #   3  'charlie delta',
        #   4  'delta'
        # ]

        tree.update_words(0, ['charlie'])
        self.assertEqual([(2, 4)], tree.candidates)

        tree.update_words(1, ['charlie'])
        self.assertEqual([(2, 3)], tree.candidates)

        tree.update_words(2, ['charlie'])
        self.assertEqual([], tree.candidates)
    def test_adds_candidates_for_words_between_others(self):
        tree = Tree(2)
        tree.build(['alpha', 'blue', 'charlie', 'charlie', 'delta'])

        # records = [
        #   0  'alpha blue',
        #   1  'blue charlie',
        #   2  'charlie charlie'
        #   3  'charlie delta',
        #   4  'delta'
        # ]

        tree.update_words(0, ['alpha'])
        self.assertEqual([(0, 1)], tree.candidates)

        tree.update_words(0, ['blue'])
        self.assertEqual([(1, 2)], tree.candidates)

        tree.update_words(0, ['charlie'])
        self.assertEqual([(2, 4)], tree.candidates)
    def test_clears_candidates_if_words_do_not_extend_candidates(self):
        tree = Tree(2)
        tree.build(['alpha', 'blue', 'charlie', 'charlie', 'delta'])

        # records = [
        #   0  'alpha blue',
        #   1  'blue charlie',
        #   2  'charlie charlie'
        #   3  'charlie delta',
        #   4  'delta'
        # ]

        tree.update_words(0, ['alpha back'])
        self.assertEqual([], tree.candidates)

        tree.update_words(0, ['alpha alpha'])
        self.assertEqual([], tree.candidates)

        tree.update_words(1, ['alpha back'])
        self.assertEqual([], tree.candidates)
    def test_does_nothing_if_no_candidates_are_passed(self):
        tree = Tree(1)
        tree.build(['alpha'])

        self.assertEqual(['alpha'], tree.records)

        tree.mergeCandidates([], 0)

        self.assertEqual(['alpha'], tree.records)
    def test_candidates_are_merged_if_they_contain_the_same_word(self):
        tree = Tree(2)
        tree.build(['alpha', 'blue', 'charlie', 'charlie', 'delta'])

        # records = [
        #   0  'alpha blue',
        #   1  'blue charlie',
        #   2  'charlie charlie',
        #   3  'charlie delta',
        #   4  'delta'
        # ]

        # (2, 2) = ['charlie charlie']
        # (2, 3) = ['charlie charlie', 'charlie delta']
        tree.mergeCandidates([(2, 2), (2, 3)], 1)
        self.assertEqual([(2, 3)], tree.candidates)

        tree.mergeCandidates([(2, 2), (2, 3)], 2)
        self.assertEqual([(2, 2), (2, 2), (2, 3)], tree.candidates)
    def test_returns_words_from_avaialble_candidates(self):
        tree = Tree(1)
        tree.build(['alpha', 'bids', 'blue', 'carves', 'charlie', 'dances', 'delta', 'eats'])

        tree.mergeCandidates([(0, 2)], 0)
        self.assertEqual([(0, 2)], tree.candidates)
        self.assertEqual([['alpha'], ['bids']], tree.get_words(0))

        tree.mergeCandidates([(0, 3)], 0)
        self.assertEqual([(0, 3)], tree.candidates)
        self.assertEqual([['alpha'], ['bids'], ['blue']], tree.get_words(0))

        tree.mergeCandidates([(0, 4)], 0)
        self.assertEqual([(0, 4)], tree.candidates)
        self.assertEqual([['alpha'], ['bids'], ['blue'], ['carves']], tree.get_words(0))

        tree.mergeCandidates([(1, 4)], 0)
        self.assertEqual([(1, 4)], tree.candidates)
        self.assertEqual([['bids'], ['blue'], ['carves']], tree.get_words(0))

        tree.mergeCandidates([(2, 4)], 0)
        self.assertEqual([(2, 4)], tree.candidates)
        self.assertEqual([['blue'], ['carves']], tree.get_words(0))

        tree.mergeCandidates([(3, 4)], 0)
        self.assertEqual([(3, 4)], tree.candidates)
        self.assertEqual([['carves']], tree.get_words(0))
    def test_records_and_length_are_correct_when_no_words_are_accepted(self):
        tree = Tree(1)
        tree.build(['alph!a', 'bl.ue', 'charl1e', 'delt@'])

        self.assertEqual([], tree.records)
        self.assertEqual(0, tree.lenRecords)
    def test_commas_are_allowed_only_at_end_of_word(self):
        tree = Tree(1)
        tree.build(['alpha,', 'bl,ue', ',charlie', 'delta'])

        self.assertCountEqual(['alpha,', 'delta'], tree.records)
    def test_exclamation_marks_are_allowed_only_at_end_of_word(self):
        tree = Tree(1)
        tree.build(['alpha!', 'bl!ue', '!charlie', 'delta'])

        self.assertCountEqual(['alpha!', 'delta'], tree.records)
    def test_returns_word_when_multiple_words_records_scanWindow_3(self):
        tree = Tree(3)
        tree.build(['alpha', 'bids', 'blue', 'carves', 'charlie', 'dances', 'delta', 'eats'])

        tree.mergeCandidates([(0, 2)], 0)
        self.assertEqual([(0, 2)], tree.candidates)
        self.assertEqual([['alpha', 'bids', 'blue'], ['bids', 'blue', 'carves']], tree.get_words(2))

        tree.mergeCandidates([(0, 3)], 0)
        self.assertEqual([(0, 3)], tree.candidates)
        self.assertEqual([['alpha', 'bids', 'blue'], ['bids', 'blue', 'carves'], ['blue', 'carves', 'charlie']], tree.get_words(2))

        tree.mergeCandidates([(0, 4)], 0)
        self.assertEqual([(0, 4)], tree.candidates)
        self.assertEqual([['alpha', 'bids', 'blue'], ['bids', 'blue', 'carves'], ['blue', 'carves', 'charlie'], ['carves', 'charlie', 'dances']], tree.get_words(2))

        tree.mergeCandidates([(1, 4)], 0)
        self.assertEqual([(1, 4)], tree.candidates)
        self.assertEqual([['bids', 'blue', 'carves'], ['blue', 'carves', 'charlie'], ['carves', 'charlie', 'dances']], tree.get_words(2))

        tree.mergeCandidates([(2, 4)], 0)
        self.assertEqual([(2, 4)], tree.candidates)
        self.assertEqual([['blue', 'carves', 'charlie'], ['carves', 'charlie', 'dances']], tree.get_words(2))

        tree.mergeCandidates([(3, 4)], 0)
        self.assertEqual([(3, 4)], tree.candidates)
        self.assertEqual([['carves', 'charlie', 'dances']], tree.get_words(2))
    def test_periods_are_allowed_only_at_end_of_word(self):
        tree = Tree(1)
        tree.build(['alpha.', 'bl.ue', '.charlie', 'delta'])

        self.assertCountEqual(['alpha.', 'delta'], tree.records)
    def test_scanWindow_2_repeated_negative(self):
        tree = Tree(2)
        tree.build(['down', 'the', 'street', 'down', 'the', 'path', 'down', 'the', 'well'])

        self.assertEqual(('', 0), tree.calc_most_frequent_next_word('down the'))
    def test_candidates_are_compared_against_the_last(self):
        tree = Tree(1)
        tree.build(['alpha', 'blue', 'charlie', 'delta'])

        tree.mergeCandidates([(3, 3), (4, 4), (2, 2)], 0)
        self.assertEqual([(2, 2), (3, 3), (4, 4)], tree.candidates)
    def test_does_nothing_for_not_found_words(self):
        tree = Tree(2)
        tree.build(['alpha', 'blue', 'charlie', 'charlie', 'delta'])

        self.assertEqual([], tree.candidates) 
        unchanged_records = ['alpha blue', 'blue charlie', 'charlie charlie', 'charlie delta', 'delta']
        self.assertEqual(unchanged_records, tree.records)

        tree.update_words(0, ['back'])
        self.assertEqual([], tree.candidates)
        self.assertEqual(unchanged_records, tree.records)

        tree.update_words(1, ['back'])
        self.assertEqual([], tree.candidates)
        self.assertEqual(unchanged_records, tree.records)

        tree.update_words(0, ['alpha blue'])
        self.assertEqual([], tree.candidates)
        self.assertEqual(unchanged_records, tree.records)

        tree.update_words(1, ['alpha blue'])
        self.assertEqual([], tree.candidates)
        self.assertEqual(unchanged_records, tree.records)
    def test_window_size_specifies_amount_of_words_collected_in_each_record(self):
        source_words = ['alpha', 'blue', 'charlie', 'delta']
        tree = Tree(1)
        tree.build(source_words)
        self.assertCountEqual(tree.records, ['alpha', 'blue', 'charlie', 'delta'])

        tree = Tree(2)
        tree.build(source_words)
        self.assertCountEqual(tree.records, ['alpha blue', 'blue charlie', 'charlie delta', 'delta'])

        tree = Tree(3)
        tree.build(source_words)
        self.assertCountEqual(tree.records, ['alpha blue charlie', 'blue charlie delta', 'charlie delta', 'delta'])

        tree = Tree(4)
        tree.build(source_words)
        self.assertCountEqual(tree.records, ['alpha blue charlie delta', 'blue charlie delta', 'charlie delta', 'delta'])

        tree = Tree(5)
        tree.build(source_words)
        self.assertCountEqual(tree.records, ['alpha blue charlie delta', 'blue charlie delta', 'charlie delta', 'delta'])
    def test_scanWindow_2_non_repeated(self):
        tree = Tree(2)
        tree.build(['alpha.', 'bids,', 'blue,', 'carves', 'charlie', 'dances', 'delta', 'eats'])

        self.assertEqual(1, tree.calc_frequency('alpha. bids'))
        self.assertEqual(1, tree.calc_frequency('alpha. bids,'))
        self.assertEqual(1, tree.calc_frequency('bids'))
        self.assertEqual(1, tree.calc_frequency('bids, blue'))
        self.assertEqual(1, tree.calc_frequency('blue'))

        self.assertEqual(0, tree.calc_frequency('blue carves'))
        self.assertEqual(0, tree.calc_frequency('bids blue'))
        self.assertEqual(0, tree.calc_frequency('bids charlie'))
    def test_finds_the_interval_for_a_particular_entry(self):
        tree = Tree(1)
        tree.build(['alpha', 'blue', 'charlie', 'delta'])

        self.assertEqual((0, 0), tree.find_interval('abacus'))
        # alpha
        self.assertEqual((1, 1), tree.find_interval('amaranth'))
        self.assertEqual((1, 1), tree.find_interval('basic'))
        # blue
        self.assertEqual((2, 2), tree.find_interval('buzz'))
        self.assertEqual((2, 2), tree.find_interval('cat'))
        # charlie
        self.assertEqual((3, 3), tree.find_interval('cut'))
        self.assertEqual((3, 3), tree.find_interval('dam'))
        # delta
        self.assertEqual((4, 4), tree.find_interval('during'))
 def test_words_are_lowercased(self):
     tree = Tree(2)
     tree.build(['Alpha', 'Blue', 'ChArLiE', 'DeltA'])
     for value in tree.records:
         self.assertEqual(value.lower(), value)