Exemplo n.º 1
0
    def test_get_alpha_num(self):
        """get_alpha_num()"""
        # Simple regression test for small automaton.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.get_alpha_num(), 3)
        
        # Manually remove symbol from _automaton1
        del aut._automaton1.alphabet[2]
        self.assertEqual(aut.get_alpha_num(), 2)
Exemplo n.º 2
0
 def test_get_table_parameters(self):
     """get_table_parameters"""
     # Manually set state_bits and symbol_bits, then check if
     # if get_table_parameters returns same values
     aut = PHF_DFA()
     aut.state_bits = 24
     aut.symbol_bits = 20
     tmp = aut.get_table_parameters()
     self.assertEqual(tmp[0], 24)
     self.assertEqual(tmp[1], 20)
Exemplo n.º 3
0
 def test___init__(self):
     """__init__()"""
     # Create PHF_DFA and check the values of internal variables.
     aut = PHF_DFA()
     self.assertFalse(aut.get_compute())
     self.assertEqual(aut._automaton, aut._automaton1)
     self.assertEqual(aut.state_bits, 10)
     self.assertEqual(aut.symbol_bits, 12)
     self.assertEqual(aut.hash_function, None)
     self.assertEqual(aut.trans_table, None)
     self.assertEqual(aut.ran, 0)
     self.assertFalse(aut.fallback)
     self.assertEqual(aut.fallback_state, -1)
     self.assertFalse(aut.faulty)
     self.assertEqual(aut.compress_hash, None)
     self.assertEqual(aut.compress_bits, 0)
     self.assertEqual(aut.bad_transitions, 0)
     self.assertEqual(aut.collisions, dict())
Exemplo n.º 4
0
 def test_disable_faulty_transitions(self):
     """disable_faulty_transitions()"""
     # Check if variables compress_bits, compress_hash, faulty and _compute
     # were set to their default values after calling method
     # disable_faulty_transitions
     aut = PHF_DFA()
     aut.enable_faulty_transitions(13)
     aut._compute = True
     aut.disable_faulty_transitions()
     self.assertEqual(aut.compress_bits, 13)
     self.assertNotEqual(aut.compress_hash, None)
     self.assertFalse(aut.get_compute())
     self.assertFalse(aut.faulty)
Exemplo n.º 5
0
def get_phf(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        Perfect hashing DFA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)  
    # create phf_dfa automaton
    aut = PHF_DFA()
    # Make automaton from RE which was in input file
    aut.create_by_parser(po)
    # redefine default PHF class 
    a = bdz()
    a.set_ratio(2.0)
    aut.set_PHF_class(a)
    # compute dfa and PHF table
    aut.compute()
    # Return experimental results
    return ["Perfect Hashing DFA", aut.get_state_num(), aut.get_trans_num(), aut.report_memory_real(), aut.report_memory_real()]
Exemplo n.º 6
0
 def test_decode_symbol(self):
     """decode_symbol()"""
     # Test if different types of symbols are decoded correctly and
     # the symbol was removed from the beginning of input string.
     aut = PHF_DFA()
     aut._automaton.alphabet[0] = b_Sym_char_class("ch0", set(['a', 'b']), 0)
     aut._automaton.alphabet[1] = b_Sym_char_class("ch1", set(['c', 'd']), 1)
     aut._automaton.alphabet[2] = b_Sym_char_class("ch2", set(['e', 'f']), 2)
     aut._automaton.alphabet[3] = b_Sym_char("ch3", "g", 3)
     aut._automaton.alphabet[4] = b_Sym_kchar("ch4", (frozenset(['1', '2']), frozenset(['1', '2'])), 4)
     self.assertEqual(aut.decode_symbol("abeg112"), ("beg112", 0))
     self.assertEqual(aut.decode_symbol("beg112"), ("eg112", 0))
     self.assertEqual(aut.decode_symbol("eg112"), ("g112", 2))
     self.assertEqual(aut.decode_symbol("g112"), ("112", 3))
     self.assertEqual(aut.decode_symbol("112"), ("2", 4))
     # Nonexistent symbol is removed from the string and -1 is returned
     self.assertEqual(aut.decode_symbol("2"), ("", -1))
Exemplo n.º 7
0
 def test_set_table_parameters(self):
     """set_table_parameters()"""
     # Set parameters using method set_table_parameters, then check if
     # internal variables have the same values
     aut = PHF_DFA()
     aut._compute = True
     tmp = (24, 20)
     aut.set_table_parameters(tmp)
     self.assertEqual(aut.state_bits, 24)
     self.assertEqual(aut.symbol_bits, 20)
     self.assertFalse(aut.get_compute())
Exemplo n.º 8
0
 def test_set_PHF_class(self):
     """set_PHF_class()"""
     # Create phf class and assing it to PHF_DFA using method set_PHF_class.
     # Check if the variable hash_function was set and _compute is false.
     aut = PHF_DFA()
     aut._compute = True
     a = bdz()
     a.set_limit(1024)
     a.set_iteration_limit(8)
     aut.set_PHF_class(a)
     self.assertEqual(aut.hash_function, a)
     self.assertFalse(aut.get_compute())
Exemplo n.º 9
0
    def test_remove_fallback_transitions(self):
        """remove_fallback_transitions()"""
        # 1. /abc/, state -1 (automatically chosen 0) - 4 transitions removed
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), 8) # 4 removed transitions
        for i in cp.transitions: # no transitions to fallback_state
            self.assertNotEqual(i[2], aut.fallback_state)
        self.assertEqual(len(cp.final), len(result.final))

        # 2. /abc/, state 1 - 3 transitions removed
        aut._automaton1 = aut._automaton
        aut.enable_fallback_state(1, False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), 9) # 3 removed transitions
        for i in cp.transitions: # no transitions to fallback_state
            self.assertNotEqual(i[2], aut.fallback_state)
        self.assertEqual(len(cp.final), len(result.final))
        
        # 3. /^abc/, state 0 - automaton does not change
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(0, warning=False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), len(result.transitions))
        for i in cp.transitions: # no transitions to fallback_state
            self.assertNotEqual(i[2], aut.fallback_state)
        self.assertEqual(len(cp.final), len(result.final))
Exemplo n.º 10
0
    def test_disable_fallback_state(self):
        """disable_fallback_state()"""
        # Test if the variables _compute, fallback and fallback_state were set
        # to the default values.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(warning=False)
        aut.compute()

        aut.disable_fallback_state()
        self.assertFalse(aut.get_compute())
        self.assertFalse(aut.fallback)
        self.assertEqual(aut.fallback_state, -1)
Exemplo n.º 11
0
    def test_enable_fallback_state(self):
        """enable_fallback_state()"""
        # Test if fallback and fallback_state is set accordingly, _compute is
        # set to False and warning is/is not printed on stdout depending on
        # value of parameter warning.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        # redirect stdout to file
        tmp = sys.stdout
        f = open("stdout.output", 'w')
        sys.stdout = f
        
        aut.enable_fallback_state(2, warning=False)
        f.close()
        e = open("stdout.output", 'r')
        line = e.readline()
        # warning was set to False, stdout should be empty
        self.assertFalse(line)
        # check if the fallback_state was set
        self.assertEqual(aut.fallback_state, 2)
        self.assertFalse(aut.get_compute())
        self.assertTrue(aut.fallback)

        f = open("stdout.output", 'w')
        sys.stdout = f
        aut.enable_fallback_state()
        f.close()
        e = open("stdout.output", 'r')
        line = e.readline()
        # warning should be printed by default
        self.assertTrue(line)
        # check if the fallback_state was chosen correctly
        self.assertEqual(aut.fallback_state, 1)
        self.assertFalse(aut.get_compute())
        self.assertTrue(aut.fallback)
        # restore sys.stdout
        sys.stdout = tmp
        os.remove("stdout.output")
Exemplo n.º 12
0
def get_phf(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        Perfect hashing DFA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # create phf_dfa automaton
    aut = PHF_DFA()
    # Make automaton from RE which was in input file
    aut.create_by_parser(po)
    # redefine default PHF class
    a = bdz()
    a.set_ratio(2.0)
    aut.set_PHF_class(a)
    # compute dfa and PHF table
    aut.compute()
    # Return experimental results
    return [
        "Perfect Hashing DFA",
        aut.get_state_num(),
        aut.get_trans_num(),
        aut.report_memory_real(),
        aut.report_memory_real()
    ]
Exemplo n.º 13
0
    def test_report_memory_real(self):
        """report_memory_real()"""
        # Few simple regression tests for different sizes of PHF table, state
        # and symbol representations and faulty transitions.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(8)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.report_memory_real(), 120)

        aut.set_table_parameters((4,6))        
        self.assertEqual(aut.report_memory_real(), 48)

        aut.set_table_parameters((4,7))
        self.assertEqual(aut.report_memory_real(), 72)

        a.set_limit(5)
        aut.set_PHF_class(a)
        aut.compute()
        self.assertEqual(aut.report_memory_real(), 45)

        aut.enable_faulty_transitions(10)
        self.assertEqual(aut.report_memory_real(), 30)
        
        aut.enable_faulty_transitions(19)
        self.assertEqual(aut.report_memory_real(), 60)
Exemplo n.º 14
0
    def test_compute(self):
        """compute()"""
        # 1. /^abc/ - automaton does not change, PHF table is created
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertEqual(len(cp.transitions), len(result.transitions))
        self.assertEqual(len(cp.final), len(result.final))
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())

        # 2. determinization of /^ab|ac/, PHF table is created
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set([0]))
        nfaData.states[3] = b_State(3,set())
        nfaData.states[4] = b_State(4,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (0,0,3) )
        nfaData.transitions.add( (3,2,4) )
        nfaData.final.add(2)
        nfaData.final.add(4)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), 3)
        self.assertEqual(len(cp.alphabet), 3)
        self.assertEqual(len(cp.transitions), 3)
        self.assertEqual(len(cp.final), 1)
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())
        
        # 3. resolve alphabet - /^[a-c][b-d]/, PHF table is created
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set([0]))
        nfaData.alphabet[0] = b_Sym_char_class("ch0", set(['a', 'b', 'c']), 0)
        nfaData.alphabet[1] = b_Sym_char_class("ch1", set(['b', 'c', 'd']), 1)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.final.add(2)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        cp = aut._automaton1
        
        self.assertEqual(len(cp.states), 3)
        self.assertEqual(len(cp.alphabet), 3)
        self.assertEqual(len(cp.transitions), 4)
        self.assertEqual(len(cp.final), 1)
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())

        # 4. /abc/ and enable_fallback_state - some transitions are removed
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        result = copy.deepcopy(nfaData)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        cp = aut._automaton1

        self.assertEqual(len(cp.states), len(result.states))
        self.assertEqual(len(cp.alphabet), len(result.alphabet))
        self.assertTrue(len(cp.transitions) < len(result.transitions))
        self.assertEqual(len(cp.final), len(result.final))
        self.assertNotEqual(aut.trans_table, None)
        self.assertTrue(aut.get_compute())
Exemplo n.º 15
0
    def test_enable_faulty_transitions(self):
        """enable_faulty_transitions()"""
        # Check if variables compress_bits, compress_hash, faulty and _compute
        # were set appropriately after calling enable_faulty_transitions.
        aut = PHF_DFA()
        aut._compute = True
        aut.enable_faulty_transitions(13)
        self.assertEqual(aut.compress_bits, 13)
        self.assertNotEqual(aut.compress_hash, None)
        self.assertFalse(aut.get_compute())
        self.assertTrue(aut.faulty)

        # Check if user created compress_hash was used
        aut = PHF_DFA()
        a = jenkins_compress(4)
        a.generate_seed()
        aut.enable_faulty_transitions(4, compress_hash = a)
        self.assertEqual(aut.compress_bits, 4)
        self.assertEqual(aut.compress_hash, a)
        self.assertFalse(aut.get_compute())
        self.assertTrue(aut.faulty)

        # Check if the hash output size is really 4 bits for few inputs
        for i in range(0, 255):
            val = aut.compress_hash.hash(([chr(i)],[chr(0)]))
            self.assertTrue(val < 16 and val >= 0)
Exemplo n.º 16
0
    def test_validate_transition(self):
        """validate_transition()"""
        # Test correct transition validation for both faulty and non-faulty
        # transition table.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        for t in aut._automaton1.transitions: # all transitions must be valid
            self.assertTrue(aut.validate_transition(aut._transition_rep(t)))
        # some nonexistent transitions -> invalid
        t = (0,2,0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (1,0,2)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (len(aut._automaton1.states), len(aut._automaton1.alphabet), 0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (0, len(aut._automaton1.alphabet), 0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (len(aut._automaton1.states), 0, 0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        # faulty transitions
        aut.enable_faulty_transitions(32)
        aut.compute()
        for t in aut._automaton1.transitions: # all transitions must be valid
            self.assertTrue(aut.validate_transition(aut._transition_rep(t)))
        # some nonexistent transitions -> invalid, collisions are improbable
        t = (0,2,0)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (1,0,2)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (10,10,1)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (11,11,1)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
        t = (12,12,1)
        self.assertFalse(aut.validate_transition(aut._transition_rep(t)))
Exemplo n.º 17
0
    def test_generate_PHF_table(self):
        """generate_PHF_table()"""
        # Test of PHF table generation - the right size of tabel, every
        # transition is exactly once in the table and on the right index.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut._automaton1 =  nfaData
        aut.generate_PHF_table()
        # transition table size
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 384)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])

        # change the size of PHF table and repeat tests
        aut = PHF_DFA()
        a = bdz()
        a.set_ratio(6.0)
        a.set_iteration_limit(10)
        aut.set_PHF_class(a)
        aut._automaton1 = nfaData
        aut.generate_PHF_table()
        # transition table size
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 72)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])

        # RE /#include.*>/ and enable fallback_state
        par = pcre_parser()
        par.set_text("/#include.*>/s")
        aut = PHF_DFA()
        a = bdz()
        a.set_ratio(2.5)
        a.set_iteration_limit(10)
        aut.set_PHF_class(a)
        aut.create_by_parser(par)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        # transition table size
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 90)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])
        # disable fallback_state
        aut.disable_fallback_state()
        aut.compute()
        self.assertEqual(aut.ran, len(aut.trans_table))
        self.assertEqual(aut.ran, 252)
        # count number of unique lines in transition table
        tranCount = dict()
        for l in aut.trans_table:
            tranCount.setdefault(l[1], 0)
            tranCount[l[1]] += 1
        # test if every automaton transition is just once in the table
        for t in aut._automaton1.transitions:
            self.assertEqual(tranCount[aut._transition_rep(t)], 1)
        t = ([2 ** aut.state_bits - 1, 2 ** aut.symbol_bits - 1, 0])
        # rest of trans are the nonexistent transitions
        self.assertEqual(tranCount[aut._transition_rep(t)], aut.ran - len(aut._automaton1.transitions))
        # check if each transition is on its index returned by hash function
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(rep, aut.trans_table[aut.hash_function.hash(rep)][1])
        # test the representation in faulty table
        aut.enable_faulty_transitions(8)
        aut.generate_PHF_table()
        for t in aut._automaton1.transitions:
            rep = aut._transition_rep(t)
            self.assertEqual(aut.compress_hash.hash(rep), aut.trans_table[aut.hash_function.hash(rep)][3])
Exemplo n.º 18
0
    def test_report_memory_naive(self):
        """report_memory_naive()"""
        # Simple regression test for small automaton.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.report_memory_naive(), 12)
        
        # Test after removing fallback transitions. report_memory_naive depends
        # on number of states and symbols, not transitions, so nothing changes
        aut.enable_fallback_state(1, warning=False)
        aut.remove_fallback_transitions()
        self.assertEqual(aut.report_memory_naive(), 12)

        # Manually remove symbol and state from _automaton1
        del aut._automaton1.states[2]
        del aut._automaton1.alphabet[2]
        self.assertEqual(aut.report_memory_naive(), 6)
Exemplo n.º 19
0
    def test_search(self):
        """search()"""
        # 1. RE /^abc/
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        self.assertEqual(aut.search("abc"), [1])
        self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [0])
        self.assertEqual(aut.search("ccccbbbabc"), [0])
        self.assertEqual(aut.search("ababc"), [0])
        self.assertEqual(aut.search("d"), [0])
        self.assertEqual(aut.search("cbabbacba"), [0])

        # 2. RE /abc/
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (0,1,0) )
        nfaData.transitions.add( (0,2,0) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (1,0,1) )
        nfaData.transitions.add( (1,2,0) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.transitions.add( (2,0,1) )
        nfaData.transitions.add( (2,1,0) )
        nfaData.transitions.add( (3,0,3) )
        nfaData.transitions.add( (3,1,3) )
        nfaData.transitions.add( (3,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()

        self.assertEqual(aut.search("abc"), [1])
        self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1])
        self.assertEqual(aut.search("ccccbbbabc"), [1])
        self.assertEqual(aut.search("ababc"), [1])
        self.assertEqual(aut.search("d"), [0])
        self.assertEqual(aut.search("cbabbacba"), [0])

        # 2a. same test with faulty transitions
        aut.enable_faulty_transitions(32)
        aut.compute()
        self.assertEqual(aut.search("abc"), [1])
        self.assertEqual(aut.search("aaaaaaaaaaaaaabc"), [1])
        self.assertEqual(aut.search("ccccbbbabc"), [1])
        self.assertEqual(aut.search("ababc"), [1])
        self.assertEqual(aut.search("d"), [0])
        self.assertEqual(aut.search("cbabbacba"), [0])

        # 3. RE /#include.*>/ with enable_fallback_state
        par = pcre_parser()
        par.set_text("/#include.*>/")
        aut = PHF_DFA()
        a = bdz()
        a.set_ratio(2.5)
        a.set_iteration_limit(10)
        aut.set_PHF_class(a)
        aut.create_by_parser(par)
        aut.enable_fallback_state(warning=False)
        aut.compute()
        self.assertEqual(aut.search("#include <stdio.h>"), [1])
        self.assertEqual(aut.search("#include <stdlib.h>"), [1])
        self.assertEqual(aut.search("#include <stdio.h>bba"), [1])
        self.assertEqual(aut.search('#include "pcre.h"'), [0])
        self.assertEqual(aut.search('asdf#include <stdio.h>'), [1])
Exemplo n.º 20
0
    def test_get_trans_num(self):
        """get_trans_num()"""
        # Simple regression test for small automaton.
        nfaData = nfa_data()
        nfaData.states[0] = b_State(0,set())
        nfaData.states[1] = b_State(1,set())
        nfaData.states[2] = b_State(2,set())
        nfaData.states[3] = b_State(3,set([0]))
        nfaData.alphabet[0] = b_Sym_char("a", "a", 0)
        nfaData.alphabet[1] = b_Sym_char("b", "b", 1)
        nfaData.alphabet[2] = b_Sym_char("c", "c", 2)
        nfaData.start = 0
        nfaData.transitions.add( (0,0,1) )
        nfaData.transitions.add( (1,1,2) )
        nfaData.transitions.add( (2,2,3) )
        nfaData.final.add(3)

        aut = PHF_DFA()
        a = bdz()
        a.set_limit(128)
        aut.set_PHF_class(a)
        aut.create_from_nfa_data(nfaData)
        aut.compute()
        self.assertEqual(aut.get_trans_num(), 3)
        
        # Test after removing fallback transitions
        aut.enable_fallback_state(1, warning=False)
        aut.remove_fallback_transitions() 
        self.assertEqual(aut.get_trans_num(), 2)
Exemplo n.º 21
0
    print("-------------------------------------------------------------------")
    print("                    Example of use: PHF DFA                        ")
    print("-------------------------------------------------------------------")
    print(" Ruleset: /#include.*>/                                            ")
    print(" Faulty Table: No                                                  ")
    print(" State bits: 10                                                    ")
    print(" Symbol bits: 12                                                   ")
    print(" Fallback State: No                                                ")
    print("-------------------------------------------------------------------")

    # create parser and load RE
    parser = pcre_parser()
    parser.set_text("/#include.*>/")

    # create phf_dfa automaton
    aut = PHF_DFA()
    aut.create_by_parser(parser)

    # redefine default PHF class so table generation won't fail in this script
    # it's not important right now, more about that later
    a = bdz()
    a.set_ratio(2.0)
    aut.set_PHF_class(a)

    # compute dfa and PHF table
    aut.compute()

    # memory used by PHF table
    print "Memory used (Real):", aut.report_memory_real(), "B"
    
    # Print number of symbols, states and transitions
Exemplo n.º 22
0
def test():
    """
        Run searching using pcregrep and PHF_DFA. Prints out the results.
    """
    # parse options
    usage = "usage: %prog rules.pcre pcap_dir/ [options]"
    optparser = OptionParser(usage=usage)
    optparser.add_option("-O", "--outputfile", dest="resultfile", help="output file for results, default is stdout")
    optparser.add_option("-P", "--PerPacket", dest="PerPacket", action="store_true", default=False,
                      help="compare nonfaulty matching for flows and packets, faulty algorithm is used only with flows")
    optparser.add_option("-s", "--showprogress", dest="progress", action="store_true", default=False,
                      help="show progress of computation")
    optparser.add_option("-C", "--count", dest="maxiter", type="int", default="1", help="number of test iterations")
    optparser.add_option("-F", "--faulty", dest="FAULTY", type="int", default="0", help="number of bits for compress hash, default is 0 (no faulty transitions)")
    optparser.add_option("-D", "--debuglevel", dest="DEBUG", type="int", default="0", help="debug output level (0-2)")
    optparser.add_option("-S", "--savefile", dest="savefile", default="", metavar="FILE", help="save nfa_data in FILE")
    optparser.add_option("-L", "--loadfile", dest="autfile", default="", metavar="FILE", help="load nfa_data from FILE")
    optparser.add_option("-N", "--nonfaulty", dest="NonFaulty", action="store_true", default=False,
                      help="try to generate PHF table without collisions, therefore ensure nonfaulty matching. Experimental code. "
                            "May take a long time with small compress hash output.")
    (options, args) = optparser.parse_args()

    global FAULTY, DEBUG
    if len(args) != 2:
       print "You must specify rules.pcre and pcap_dir/"
       optparser.print_usage()
       exit(1)
    rulesfile, inputdir = args
    PerPacket, resultfile,  maxiter, autfile, savefile, FAULTY, DEBUG = options.PerPacket, options.resultfile, options.maxiter, options.autfile, options.savefile, options.FAULTY, options.DEBUG
    progress = options.progress
    NonFaulty = options.NonFaulty
    
    if inputdir[-1] == "/":
        inputdir = inputdir[:-1] # remove '/' from the end
    rules = open(rulesfile, 'rb')
    if PerPacket:
        packetdir = inputdir + "/packets"
        inputdir = inputdir + "/flows"
    if resultfile:
        sys.stdout = open(resultfile, 'a')
    totalhits, totalfp, totalfn = (0, 0, 0)
    iter = 0    
    while iter != maxiter:
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r',"pcregrep",
        if not iter:
            # prepare pcregrep
            p = subprocess.Popen("cd pcre-8.20/ && make pcregrep", shell=True, stdout=subprocess.PIPE)
            p.wait()
            results = dict()
        file_list = list()
        rule_count = len(open(rulesfile).readlines())
        for root, dirs, files in os.walk(inputdir):
            for i in files:
                i = os.path.join(root, i)
                file_list.append(i)
                if not iter:
                    results[i] = [rule_count*[0],rule_count*[0],rule_count*[0]]
                else:
                    results[i][0] = rule_count*[0]
        #results = init_results
        rule_num = 0
        grep_reg_exp = "grep_reg_exp." + str(os.getpid())
        for rule in rules:
            if not iter:
                if DEBUG:
                    print rule,
                (grep_rule, grep_params) = parse_rule(rule)
                f = open(grep_reg_exp, 'w')
                f.write(grep_rule)
                f.close()
                p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + inputdir, shell=True, stdout=subprocess.PIPE)
                p.wait()
                for out in p.stdout:
                    item = out.split()[0]
                    results[item][1][rule_num] = 1
                if PerPacket:
                    p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + packetdir, shell=True, stdout=subprocess.PIPE)
                    p.wait()
                    for out in p.stdout:
                        item =  inputdir + "/" + out.split()[0].split("-")[1].replace("_", "/")
                        results[item][2][rule_num] = 1

            rule_num += 1
        try:
            os.remove(grep_reg_exp)
        except:
            pass
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r', "create automaton",
        #aut = b_Automaton()
        aut = PHF_DFA()
        if autfile:
            aut.create_from_nfa_data(nfa_data().load_from_file(autfile))
        else:
            par = parser("pcre_parser")
            #par.set_text(rule)
            par.load_file(rulesfile)
            aut.create_by_parser(par)
            if DEBUG:
                aut.show("NFA.dot")
            #aut.remove_epsilons()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "resolve alphabet",
            aut.resolve_alphabet()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "determinise",
            aut.determinise()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "minimise",
            aut.minimise()
            if DEBUG:
                aut.show("DFA.dot")
            if savefile:
                aut._automaton.save_to_file(savefile)
        aut._automaton1 = aut._automaton
        aut.set_table_parameters((20,10))
        if DEBUG > 1:
            print "Without fallback state:"
            print "Symbols:", len(aut._automaton.alphabet)
            print "States:", len(aut._automaton.states)
            print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%"
        if isinstance(aut, PHF_DFA):
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "generate PHF",
            if aut.get_trans_num() == (aut.get_state_num() * aut.get_alpha_num()):
                aut.enable_fallback_state(warning=False)
            if FAULTY:
                aut.enable_faulty_transitions(FAULTY)
                if NonFaulty:
                    aut.enable_faulty_check()
            aut.compute()
            if DEBUG:
                print "Fallback state:", aut.fallback_state
                print "Symbols:", len(aut._automaton.alphabet)
                print "States:", len(aut._automaton.states)
                print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%"
        count = 1
        all = len(file_list)
        if progress:
            print >> sys.stderr, '\r' + 80*' ' + '\r',
        for f in file_list:
            # progress
            if progress:
                print >> sys.stderr, '\r',
                print >> sys.stderr, str(iter+1)+'/'+str(maxiter)+ ":", count, '/', all,
#                sys.stderr.flush()
            count += 1
            data = open(f, 'rb').read()
            results[f][0] = aut.search(data)
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r', "compare results",
        if isinstance(aut, PHF_DFA) and DEBUG:
            if DEBUG > 1:
                print "List of collisions:"
                print aut.collisions
                for tran, i in aut.collisions.iteritems():
                    #print tran, i
                    print  BitArray(bytes=tran[0], length=aut.symbol_bits).uint, BitArray(bytes=tran[1], length=aut.state_bits).uint, i
                    print "SYM:", aut._automaton.alphabet[BitArray(bytes=tran[0], length=aut.symbol_bits).uint]
            print "Bad transitions:", aut.bad_transitions
            print "Collisions:", len(aut.collisions)
            print "Compress bits:", aut.compress_bits
        stats = compare_results(results)
        stats = list(stats)
        if stats[0] == 0:
            print "Zero hits, cannot compute F-measure!"
            stats[0] = 1
        if DEBUG:
            print "Total number of searched packets/flows:", stats[3]
        print "Hits:", stats[0]
        totalhits += stats[0]
        totalfp += stats[1]
        totalfn += stats[2]
        precis = float(stats[0])/(stats[1]+stats[0])
        recall = float(stats[0])/(stats[0]+stats[2])
        fmeas = 2* precis * recall / (precis + recall)
        print "False positives:", stats[1], precis*100, "%"
        print "False negatives:", stats[2], recall*100, "%"
        print "F-measure:", fmeas*100, "%"
        if PerPacket:
            print "Per packet errors:", stats[4], stats[5]
        print '-'*80
        iter += 1
    print "Total stats:"
    precis = float(totalhits)/(totalfp + totalhits)
    recall = float(totalhits)/(totalfn + totalhits)
    fmeas = 2* precis * recall / (precis + recall)
    print "Hits:", totalhits
    print "False positives:", totalfp, precis*100, "%"
    print "False negatives:", totalfn, recall*100, "%"
    print "F-measure:", fmeas*100, "%"
    print "_"*80