def find_bp_cex(): """ This example shows how transition focus equivalence oracle can be used to efficiently find counterexamples. """ rnn, alphabet, train_set = train_or_load_rnn('bp_2', num_layers=2, hidden_dim=50, rnn_class=GRUNetwork, train=False) model = load_automaton_from_file('TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa') sul = RNN_BinarySUL_for_Weiss_Framework(rnn) eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=1000, walk_len=20) cex_set = set() for _ in range(10): start_time = time.time() cex = eq_oracle.find_cex(model) if tuple(cex) in cex_set: continue cex_set.add(tuple(cex)) end_time = time.time() - start_time print(round(end_time, 2), "".join(cex))
def test_all_configuration_combinations(self): angluin_example = get_Angluin_dfa() alphabet = angluin_example.get_input_alphabet() automata_type = ['dfa', 'mealy', 'moore'] closing_strategies = ['shortest_first', 'longest_first', 'single'] cex_processing = [None, 'longest_prefix', 'rs'] suffix_closedness = [True, False] caching = [True, False] for automata in automata_type: for closing in closing_strategies: for cex in cex_processing: for suffix in suffix_closedness: for cache in caching: sul = DfaSUL(angluin_example) random_walk_eq_oracle = RandomWalkEqOracle(alphabet, sul, 5000, reset_after_cex=True) state_origin_eq_oracle = StatePrefixEqOracle(alphabet, sul, walks_per_state=10, walk_len=50) tran_cov_eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=200, walk_len=30, same_state_prob=0.3) w_method_eq_oracle = WMethodEqOracle(alphabet, sul, max_number_of_states=len(angluin_example.states)) random_W_method_eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=10, walk_len=50) bf_exploration_eq_oracle = BreadthFirstExplorationEqOracle(alphabet, sul, 3) random_word_eq_oracle = RandomWordEqOracle(alphabet, sul) cache_based_eq_oracle = CacheBasedEqOracle(alphabet, sul) kWayStateCoverageEqOracle = KWayStateCoverageEqOracle(alphabet, sul) oracles = [random_walk_eq_oracle, random_word_eq_oracle, random_W_method_eq_oracle, kWayStateCoverageEqOracle, cache_based_eq_oracle, bf_exploration_eq_oracle, tran_cov_eq_oracle, w_method_eq_oracle, state_origin_eq_oracle] if not cache: oracles.remove(cache_based_eq_oracle) for oracle in oracles: sul = DfaSUL(angluin_example) oracle.sul = sul learned_model = run_Lstar(alphabet, sul, oracle, automaton_type=automata, closing_strategy=closing, suffix_closedness=suffix, cache_and_non_det_check=cache, cex_processing=cex, print_level=0) is_eq = self.prove_equivalence(learned_model) if not is_eq: print(oracle, automata) assert False assert True
def random_dfa_example(alphabet_size, number_of_states, num_accepting_states=1): """ Generate a random DFA machine and learn it. :param alphabet_size: size of the input alphabet :param number_of_states: number of states in the generated DFA :param num_accepting_states: number of accepting states :return: DFA """ assert num_accepting_states <= number_of_states alphabet = list(string.ascii_letters[:26])[:alphabet_size] random_dfa = generate_random_dfa(number_of_states, alphabet, num_accepting_states) # visualize_automaton(random_dfa, path='correct') sul_dfa = DfaSUL(random_dfa) # examples of various equivalence oracles random_walk_eq_oracle = RandomWalkEqOracle(alphabet, sul_dfa, 5000) state_origin_eq_oracle = StatePrefixEqOracle(alphabet, sul_dfa, walks_per_state=10, walk_len=50) tran_cov_eq_oracle = TransitionFocusOracle(alphabet, sul_dfa, num_random_walks=200, walk_len=30, same_state_prob=0.3) w_method_eq_oracle = WMethodEqOracle(alphabet, sul_dfa, max_number_of_states=number_of_states) random_W_method_eq_oracle = RandomWMethodEqOracle(alphabet, sul_dfa, walks_per_state=10, walk_len=50) bf_exploration_eq_oracle = BreadthFirstExplorationEqOracle( alphabet, sul_dfa, 5) random_word_eq_oracle = RandomWordEqOracle(alphabet, sul_dfa) cache_based_eq_oracle = CacheBasedEqOracle(alphabet, sul_dfa) user_based_eq_oracle = UserInputEqOracle(alphabet, sul_dfa) kWayStateCoverageEqOracle = KWayStateCoverageEqOracle(alphabet, sul_dfa) learned_dfa = run_Lstar(alphabet, sul_dfa, random_walk_eq_oracle, automaton_type='dfa', cache_and_non_det_check=False, cex_processing='rs') # visualize_automaton(learned_dfa) return learned_dfa
def falsify_refinement_based_model(): """ Show how extensive coverage-based testing can be used to falsify model returned from refinement-based extraction approach. """ rnn, alphabet, train_set = train_or_load_rnn('bp_1', num_layers=2, hidden_dim=50, rnn_class=GRUNetwork, train=False) # initial examples for Weiss et Al all_words = sorted(list(train_set.keys()), key=lambda x: len(x)) pos = next((w for w in all_words if rnn.classify_word(w) is True), None) neg = next((w for w in all_words if rnn.classify_word(w) is False), None) starting_examples = [w for w in [pos, neg] if None is not w] # Extract Automaton Using White-Box eq. query rnn.renew() start_white_box = time.time() dfa_weiss = extract(rnn, time_limit=500, initial_split_depth=10, starting_examples=starting_examples) time_white_box = time.time() - start_white_box # Make sure that internal states are back to initial rnn.renew() white_box_hyp = Weiss_to_AALpy_DFA_format(dfa_weiss) sul = RNN_BinarySUL_for_Weiss_Framework(rnn) eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=1000, walk_len=20) eq_oracle = StatePrefixEqOracle(alphabet, sul, walks_per_state=1500, walk_len=20) cex_set = set() for _ in range(10): start_time = time.time() cex = eq_oracle.find_cex(white_box_hyp) if not cex or tuple(cex) in cex_set: continue cex_set.add(tuple(cex)) end_time = time.time() - start_time print(round(end_time, 2), "".join(cex))
def train_and_extract_bp(path="TrainingDataAndAutomata/balanced()_1.txt", load=False): bp_alphabet = list(string.ascii_lowercase + "()") x, y = parse_data(path) x_train, y_train, x_test, y_test = preprocess_binary_classification_data( x, y, bp_alphabet) # CHANGE PARAMETERS OF THE RNN if you want rnn = RNNClassifier(bp_alphabet, output_dim=2, num_layers=2, hidden_dim=50, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, batch_size=18, nn_type="GRU") data_index = path[-5] if not load: rnn.train(stop_acc=1., stop_epochs=3, verbose=True) rnn.save(f"RNN_Models/balanced_parentheses{data_index}.rnn") else: rnn.load(f"RNN_Models/balanced_parentheses{data_index}.rnn") sul = RnnBinarySUL(rnn) alphabet = bp_alphabet state_eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=500, walk_len=30, same_state_prob=0.3) dfa = run_Lstar(alphabet=alphabet, sul=sul, eq_oracle=state_eq_oracle, automaton_type='dfa', cache_and_non_det_check=False, max_learning_rounds=5) save_automaton_to_file( dfa, f'LearnedAutomata/balanced_parentheses{data_index}') return dfa
def test_eq_oracles(self): angluin_example = get_Angluin_dfa() alphabet = angluin_example.get_input_alphabet() automata_type = ['dfa', 'mealy', 'moore'] for automata in automata_type: sul = DfaSUL(angluin_example) random_walk_eq_oracle = RandomWalkEqOracle(alphabet, sul, 5000, reset_after_cex=True) state_origin_eq_oracle = StatePrefixEqOracle(alphabet, sul, walks_per_state=10, walk_len=50) tran_cov_eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=200, walk_len=30, same_state_prob=0.3) w_method_eq_oracle = WMethodEqOracle(alphabet, sul, max_number_of_states=len(angluin_example.states)) random_W_method_eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=10, walk_len=50) bf_exploration_eq_oracle = BreadthFirstExplorationEqOracle(alphabet, sul, 3) random_word_eq_oracle = RandomWordEqOracle(alphabet, sul) cache_based_eq_oracle = CacheBasedEqOracle(alphabet, sul) kWayStateCoverageEqOracle = KWayStateCoverageEqOracle(alphabet, sul) oracles = [random_walk_eq_oracle, random_word_eq_oracle, random_W_method_eq_oracle, w_method_eq_oracle, kWayStateCoverageEqOracle, cache_based_eq_oracle, bf_exploration_eq_oracle, tran_cov_eq_oracle, state_origin_eq_oracle] for oracle in oracles: sul = DfaSUL(angluin_example) oracle.sul = sul learned_model = run_Lstar(alphabet, sul, oracle, automaton_type=automata, cache_and_non_det_check=True, cex_processing=None, print_level=0) is_eq = self.prove_equivalence(learned_model) if not is_eq: print(oracle, automata) assert False assert True
def run_comparison(example, train=True, num_layers=2, hidden_dim=50, rnn_class=GRUNetwork, insufficient_testing=False, verbose=False): rnn, alphabet, train_set = train_or_load_rnn(example, num_layers=num_layers, hidden_dim=hidden_dim, rnn_class=rnn_class, train=train) # initial examples for Weiss et Al all_words = sorted(list(train_set.keys()), key=lambda x: len(x)) pos = next((w for w in all_words if rnn.classify_word(w) is True), None) neg = next((w for w in all_words if rnn.classify_word(w) is False), None) starting_examples = [w for w in [pos, neg] if None is not w] # Extract Automaton Using White-Box eq. query rnn.renew() if verbose: print('---------------------------------WHITE BOX EXTRACTION--------------------------------------------------') else: blockPrint() start_white_box = time.time() dfa_weiss = extract(rnn, time_limit=500, initial_split_depth=10, starting_examples=starting_examples) time_white_box = time.time() - start_white_box # Make sure that internal states are back to initial rnn.renew() if verbose: print('---------------------------------BLACK BOX EXTRACTION--------------------------------------------------') sul = RNN_BinarySUL_for_Weiss_Framework(rnn) alphabet = list(alphabet) # define the equivalence oracle if insufficient_testing: eq_oracle = RandomWordEqOracle(alphabet, sul, num_walks=100, min_walk_len=3, max_walk_len=12) else: eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=1000, walk_len=25) if 'tomita' not in example: eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=1000, walk_len=20) start_black_box = time.time() aalpy_dfa = run_Lstar(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='dfa', max_learning_rounds=10, print_level=2 , cache_and_non_det_check=False, cex_processing='rs') time_black_box = time.time() - start_black_box enablePrint() if insufficient_testing: if len(aalpy_dfa.states) == len(dfa_weiss.Q): translated_weiss_2_aalpy = Weiss_to_AALpy_DFA_format(dfa_weiss) sul = DfaSUL(translated_weiss_2_aalpy) eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=1000, walk_len=10) cex = eq_oracle.find_cex(aalpy_dfa) if not cex: print( '-------------------------WHITE-Box vs. BLACK-BOX WITH INSUFFICIENT TESTING -------------------------') print('White-box and Black-box technique extracted the same automaton.') print(f'White-box time: {round(time_white_box, 2)} seconds.') print(f'Black-box time: {round(time_black_box, 2)} seconds.') else: verify_cex(aalpy_dfa, translated_weiss_2_aalpy, rnn, [cex]) return if len(aalpy_dfa.states) != len(dfa_weiss.Q): print('---------------------------------WHITE vs. BLACK BOX EXTRACTION----------------------------------------') nn_props = F'{"GRU" if rnn_class == GRUNetwork else "LSTM"}_layers_{num_layers}_dim_{hidden_dim}' print(f'Example : {example}') print(f'Configuration : {nn_props}') print(f"Number of states\n " f"White-box extraction: {len(dfa_weiss.Q)}\n " f"Black-box extraction: {len(aalpy_dfa.states)}") translated_weiss_2_aalpy = Weiss_to_AALpy_DFA_format(dfa_weiss) sul = DfaSUL(translated_weiss_2_aalpy) eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=10000, walk_len=20) if 'tomita' not in example: eq_oracle = TransitionFocusOracle(alphabet, sul) cex_set = [] for _ in range(10): cex = eq_oracle.find_cex(aalpy_dfa) if cex and cex not in cex_set: cex_set.append(cex) cex_set.sort(key=len) # verify that the counterexamples are not spurios and find out which model is correct one real_cex = verify_cex(aalpy_dfa, translated_weiss_2_aalpy, rnn, cex_set) if not real_cex: print('Spurious CEX') assert False #print('Few Counterexamples') #print(' ', cex_set[:3]) else: print('Size of both models: ', len(aalpy_dfa.states))