Пример #1
0
    def test_insert_patternTree(self):
        pt = pattern_tree.get_root()
        pt.insert_patternTree(PatternTree('wwww'))
        self.assertEqual(len(pt.children), 1, pt)
        self.assertEqual(pt.children[0].pattern, 'wwww')

        pt.insert_patternTree(PatternTree('wwww'))
        self.assertEqual(len(pt.children), 1, pt)
        self.assertEqual(pt.children[0].pattern, 'wwww')

        pt.insert_patternTree(PatternTree('wwwb'))
        self.assertEqual(len(pt.children), 2)
        self.assertTrue(pt.children[0].pattern == 'wwww'
                        or pt.children[0].pattern == 'wwwb')
        self.assertTrue(pt.children[1].pattern == 'wwww'
                        or pt.children[1].pattern == 'wwwb')

        pt.insert_patternTree(PatternTree('www?'))
        self.assertEqual(
            len(pt.children), 1,
            'children should be sorted into more specific patterns, if one is added.'
        )
        self.assertTrue(pt.children[0].pattern == 'www?')
        child = pt.children[0]
        self.assertEqual(len(child.children), 2)
        self.assertTrue(child.children[0].pattern == 'wwww'
                        or child.children[0].pattern == 'wwwb')
        self.assertTrue(child.children[1].pattern == 'wwww'
                        or child.children[1].pattern == 'wwwb')

        pt = pattern_tree.get_root()
        pt.insert_patternTree(PatternTree('wb??'))
        pt.insert_patternTree(PatternTree('bw??'))
        pt.insert_patternTree(PatternTree('b??w'))
        self.assertEqual(len(pt.children), 3)
Пример #2
0
    def test_split_not_executed_if_sequences_divided_over_different_strengths(
            self):
        strength1 = '1k'
        strength2 = '18k'

        assert max_seq_per_node > 300
        assert max_seq_per_node < 300 * 2

        pt = pattern_tree.get_root()
        for p in ['ww??']:
            pt.insert_patternTree(PatternTree(p))

        pt.insert_sequence_counted(('wwbw', 300), strength1)
        pt.insert_sequence_counted(('wwbb', 300), strength2)
        self.assertEqual(len(pt.children[0].children), 0)

        pt.balance_tree()
        was_balanced = len(pt.children[0].children) == 0
        self.assertTrue(was_balanced)

        pt.insert_sequence_counted(('wwbw', 300), strength1)
        pt.insert_sequence_counted(('wwbb', 300), strength2)
        self.assertEqual(len(pt.children[0].children), 0)

        pt.balance_tree()
        was_balanced = len(pt.children[0].children) == 0
        self.assertFalse(was_balanced)
Пример #3
0
    def test_init(self):
        pt = PatternTree('bwbw')
        self.assertEqual(pt.pattern, 'bwbw')
        self.assertNotEqual(pt.pattern, '????')

        root = pattern_tree.get_root()
        self.assertEqual(root.pattern, '????')
Пример #4
0
    def test_sequence_counted(self):
        strength = '1k'
        pt = pattern_tree.get_root()
        for p in ['wwww', 'wwwb', 'www?']:
            pt.insert_patternTree(PatternTree(p))

        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength), 0)
        pt.insert_sequence_counted(('wwww', 10), '1k')
        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength), 10)

        self.assertEqual(pt.get_frequency_of_pattern('www?', strength), 10)
        pt.insert_sequence_counted(('wwwb', 20), '1k')
        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength), 10)
        self.assertEqual(pt.get_frequency_of_pattern('wwwb', strength), 20)
        self.assertEqual(pt.get_frequency_of_pattern('www?', strength), 30)
        self.assertEqual(pt.get_frequency_of_pattern('????', strength), 30)

        pt.insert_sequence_counted(('wwwb', 20), '1k')
        self.assertEqual(pt.get_frequency_of_pattern('www?', strength), 50)

        pt = pattern_tree.get_root()
        pt.insert_patternTree(PatternTree('wb??'))
        pt.insert_patternTree(PatternTree('bw??'))
        pt.insert_patternTree(PatternTree('b??w'))
        self.assertEqual(len(pt.children), 3)
        pt.insert_sequence_counted(('wbwb', 10), strength)
        self.assertEqual(pt.get_frequency_of_pattern('wb??', strength), 10)
        self.assertEqual(pt.get_frequency_of_pattern('w???', strength), 10)
        self.assertEqual(pt.get_frequency_of_pattern('????', strength), 10)
        self.assertEqual(pt.get_frequency_of_pattern('b??w', strength), 0)

        pt.insert_sequence_counted(('wbww', 20), strength)
        self.assertEqual(pt.get_frequency_of_pattern('wb??', strength), 30)
        self.assertEqual(pt.get_frequency_of_pattern('w???', strength), 30)
        self.assertEqual(pt.get_frequency_of_pattern('????', strength), 30)

        pt = pattern_tree.get_root()
        pt.insert_patternTree(PatternTree('www?'))
        pt.insert_sequence_counted(('wwbb', 10), strength)
        self.assertEqual(pt.get_frequency_of_pattern('ww??', strength), 0,
                         'Can count pattern only if it is in pattern tree')
        pt.insert_patternTree(PatternTree('ww??'))
        self.assertEqual(
            pt.get_frequency_of_pattern('ww??', strength), 10,
            'Gets sorted into to most specific pattern, even if pattern is added after the sequence'
        )
Пример #5
0
    def test_balance_tree_merge(self):
        strength = '1k'
        pt = pattern_tree.get_root()
        for p in ['wwww', 'wwwb', 'www?']:
            pt.insert_patternTree(PatternTree(p))

        pt.insert_sequence_counted(('wwbw', 5), strength)
        pt.insert_sequence_counted(('wwbb', 5), strength)
        self.assertEqual(len(pt.children[0].children), 2)

        pt.balance_tree()
        self.assertEqual(len(pt.children[0].children), 0)
Пример #6
0
    def test_balance_tree_split(self):
        strength = '1k'
        pt = pattern_tree.get_root()
        for p in ['ww??']:
            pt.insert_patternTree(PatternTree(p))

        pt.insert_sequence_counted(('wwbw', 1000), strength)
        pt.insert_sequence_counted(('wwbb', 1000), strength)
        self.assertEqual(len(pt.children[0].children), 0)

        pt.balance_tree()
        self.assertEqual(len(pt.children[0].children), 1)
        self.assertEqual(len(pt.children[0].children[0].children), 1)
        self.assertEqual(len(pt.children[0].children[0].children[0].children),
                         0)

        self.assertTrue(
            pt.children[0].children[0].children[0].pattern == 'wwbb'
            or pt.children[0].children[0].children[0].pattern == 'wwbw')
Пример #7
0
    def test_sequence_counted_multiple_strengths(self):
        strength1 = '1k'
        strength2 = '18k'

        assert strengths.count(strength1) == 1
        assert strengths.count(strength2) == 1

        pt = pattern_tree.get_root()
        for p in ['wwww', 'wwwb', 'www?']:
            pt.insert_patternTree(PatternTree(p))

        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength1), 0)
        pt.insert_sequence_counted(('wwww', 10), strength1)
        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength1), 10)
        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength2), 0)

        self.assertEqual(pt.get_frequency_of_pattern('www?', strength1), 10)
        pt.insert_sequence_counted(('wwwb', 20), strength1)
        pt.insert_sequence_counted(('wwwb', 22), strength2)
        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength1), 10)
        self.assertEqual(pt.get_frequency_of_pattern('wwww', strength2), 0)

        self.assertEqual(pt.get_frequency_of_pattern('wwwb', strength1), 20)
        self.assertEqual(pt.get_frequency_of_pattern('wwwb', strength2), 22)
Пример #8
0
def create_pattern_tree(patterns):
    root = pattern_tree.get_root()
    for pattern in patterns:
        root.insert_patternTree(PatternTree(pattern))
    return root