def test_saving_loading(self): try: dfa = get_Angluin_dfa() mealy = load_automaton_from_file('../DotModels/Angluin_Mealy.dot', automaton_type='mealy') moore = load_automaton_from_file('../DotModels/Angluin_Moore.dot', automaton_type='moore') onfsm = get_benchmark_ONFSM() mdp = get_small_pomdp() smm = get_faulty_coffee_machine_SMM() mc = generate_random_markov_chain(num_states=10) models = [dfa, mealy, moore, onfsm, mc, mdp, smm] types = ['dfa', 'mealy', 'moore', 'onfsm', 'mc', 'mdp', 'smm'] for model, type in zip(models, types): model.save() loaded_model = load_automaton_from_file( 'LearnedModel.dot', type) loaded_model.save() loaded_model2 = load_automaton_from_file( 'LearnedModel.dot', type) # loaded_model2.visualize(path=type) if type != 'mc': ia = model.get_input_alphabet() ia2 = loaded_model2.get_input_alphabet() assert set(ia) == set(ia2) if type in {'dfa', 'moore', 'mealy'}: assert model.compute_characterization_set( ) == loaded_model2.compute_characterization_set() assert True except: assert False
def get_test_automata(self): return {"angluin_dfa": get_Angluin_dfa(), "angluin_mealy": load_automaton_from_file('../DotModels/Angluin_Mealy.dot', automaton_type='mealy'), "angluin_moore": load_automaton_from_file('../DotModels/Angluin_Moore.dot', automaton_type='moore'), "mqtt": load_automaton_from_file('../DotModels/MQTT/emqtt__two_client_will_retain.dot', automaton_type='mealy'), "openssl": load_automaton_from_file('../DotModels/TLS/OpenSSL_1.0.2_server_regular.dot', automaton_type='mealy'), "tcp_server": load_automaton_from_file('../DotModels/TCP/TCP_Linux_Server.dot', automaton_type='mealy')}
def active_alergia_example(example='first_grid'): from random import choice, randint from aalpy.SULs import MdpSUL from aalpy.utils import load_automaton_from_file from aalpy.learning_algs import run_active_Alergia from aalpy.learning_algs.stochastic_passive.ActiveAleriga import RandomWordSampler mdp = load_automaton_from_file(f'./DotModels/MDPs/{example}.dot', automaton_type='mdp') input_alphabet = mdp.get_input_alphabet() sul = MdpSUL(mdp) data = [] for _ in range(50000): input_query = tuple(choice(input_alphabet) for _ in range(randint(6, 14))) outputs = sul.query(input_query) # format data in [O, (I, O), (I, O)...] formatted_io = [outputs.pop(0)] for i, o in zip(input_query, outputs): formatted_io.append((i, o)) data.append(formatted_io) sampler = RandomWordSampler(num_walks=1000, min_walk_len=8, max_walk_len=20) model = run_active_Alergia(data, sul, sampler, n_iter=10) print(model)
def test_learning_based_on_accuracy_based_stopping(self): example = 'first_grid' mdp = load_automaton_from_file(f'../DotModels/MDPs/{example}.dot', automaton_type='mdp') min_rounds = 10 max_rounds = 500 from aalpy.automata import StochasticMealyMachine from aalpy.utils import model_check_experiment, get_properties_file, \ get_correct_prop_values from aalpy.automata.StochasticMealyMachine import smm_to_mdp_conversion aalpy.paths.path_to_prism = "C:/Program Files/prism-4.6/bin/prism.bat" aalpy.paths.path_to_properties = "../Benchmarking/prism_eval_props/" stopping_based_on_prop = (get_properties_file(example), get_correct_prop_values(example), 0.02) input_alphabet = mdp.get_input_alphabet() automaton_type = ['mdp', 'smm'] similarity_strategy = ['classic', 'normal', 'chi2'] cex_processing = [None, 'longest_prefix'] samples_cex_strategy = [None, 'bfs', 'random:200:0.3'] for aut_type in automaton_type: for strategy in similarity_strategy: for cex in cex_processing: for sample_cex in samples_cex_strategy: sul = StochasticMealySUL(mdp) if aut_type == 'smm' else MdpSUL(mdp) eq_oracle = UnseenOutputRandomWalkEqOracle(input_alphabet, sul=sul, num_steps=200, reset_prob=0.25, reset_after_cex=True) learned_model = run_stochastic_Lstar(input_alphabet=input_alphabet, eq_oracle=eq_oracle, sul=sul, n_c=20, n_resample=1000, min_rounds=min_rounds, max_rounds=max_rounds, automaton_type=aut_type, strategy=strategy, cex_processing=cex, samples_cex_strategy=sample_cex, target_unambiguity=0.99, property_based_stopping=stopping_based_on_prop, print_level=0) if isinstance(learned_model, StochasticMealyMachine): mdp = smm_to_mdp_conversion(learned_model) else: mdp = learned_model results, diff = model_check_experiment(get_properties_file(example), get_correct_prop_values(example), mdp) for d in diff.values(): if d > stopping_based_on_prop[2]: assert False assert True
def __init__(self): super().__init__() five_clients_mqtt_mealy = load_automaton_from_file('DotModels/five_clients_mqtt_abstracted_onfsm.dot', automaton_type='mealy') self.five_client_mqtt = MealySUL(five_clients_mqtt_mealy) self.connected_clients = set() self.subscribed_clients = set() self.clients = ('c0', 'c1', 'c2', 'c3', 'c4')
def find_bp_cex(): """ This example shows how transition focus equivalence oracle can be used to efficiently find counterexamples. """ rnn, alphabet, train_set = train_or_load_rnn('bp_2', num_layers=2, hidden_dim=50, rnn_class=GRUNetwork, train=False) model = load_automaton_from_file('TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa') sul = RNN_BinarySUL_for_Weiss_Framework(rnn) eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=1000, walk_len=20) cex_set = set() for _ in range(10): start_time = time.time() cex = eq_oracle.find_cex(model) if tuple(cex) in cex_set: continue cex_set.add(tuple(cex)) end_time = time.time() - start_time print(round(end_time, 2), "".join(cex))
def benchmark_stochastic_example(example, automaton_type='smm', n_c=20, n_resample=1000, min_rounds=10, max_rounds=500, strategy='normal', cex_processing='longest_prefix', stopping_based_on_prop=None, samples_cex_strategy=None): """ Learning the stochastic Mealy Machine(SMM) various benchmarking examples found in Chapter 7 of Martin's Tappler PhD thesis. :param n_c: cutoff for a state to be considered complete :param automaton_type: either smm (stochastic mealy machine) or mdp (Markov decision process) :param n_resample: resampling size :param example: One of ['first_grid', 'second_grid', 'shared_coin', 'slot_machine'] :param min_rounds: minimum number of learning rounds :param max_rounds: maximum number of learning rounds :param strategy: normal, classic or chi2 :param cex_processing: counterexample processing strategy :stopping_based_on_prop: a tuple (path to properties, correct values, error bound) :param samples_cex_strategy: strategy to sample cex in the trace tree :return: learned SMM """ from aalpy.SULs import MdpSUL from aalpy.oracles import RandomWalkEqOracle, RandomWordEqOracle from aalpy.learning_algs import run_stochastic_Lstar from aalpy.utils import load_automaton_from_file # Specify the path to the dot file containing a MDP mdp = load_automaton_from_file(f'./DotModels/MDPs/{example}.dot', automaton_type='mdp') input_alphabet = mdp.get_input_alphabet() sul = MdpSUL(mdp) eq_oracle = RandomWordEqOracle(input_alphabet, sul, num_walks=100, min_walk_len=5, max_walk_len=15, reset_after_cex=True) eq_oracle = RandomWalkEqOracle(input_alphabet, sul=sul, num_steps=2000, reset_prob=0.25, reset_after_cex=True) learned_mdp = run_stochastic_Lstar(input_alphabet=input_alphabet, eq_oracle=eq_oracle, sul=sul, n_c=n_c, n_resample=n_resample, min_rounds=min_rounds, max_rounds=max_rounds, automaton_type=automaton_type, strategy=strategy, cex_processing=cex_processing, samples_cex_strategy=samples_cex_strategy, target_unambiguity=0.99, property_based_stopping=stopping_based_on_prop) return learned_mdp
def rpni_mealy_example(): import random from aalpy.learning_algs import run_RPNI from aalpy.utils import generate_random_mealy_machine, load_automaton_from_file from aalpy.utils.HelperFunctions import all_prefixes random.seed(1) model = generate_random_mealy_machine(num_states=5, input_alphabet=[1, 2, 3], output_alphabet=['a', 'b']) model = load_automaton_from_file('DotModels/Bluetooth/bluetooth_model.dot', automaton_type='mealy') input_al = model.get_input_alphabet() num_sequences = 1000 data = [] for _ in range(num_sequences): seq_len = random.randint(1, 10) random_seq = random.choices(input_al, k=seq_len) # make sure that all prefixes all included in the dataset for prefix in all_prefixes(random_seq): output = model.compute_output_seq(model.initial_state, prefix)[-1] data.append((prefix, output)) rpni_model = run_RPNI(data, automaton_type='mealy', print_info=True) return rpni_model
def accuracy_test(): ground_truth_model = load_automaton_from_file( 'TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa') input_al = ground_truth_model.get_input_alphabet() output_al = [1, 0] train_seq, train_labels = generate_data_from_automaton(ground_truth_model, input_al, num_examples=10000, lens=(1, 2, 3, 5, 8, 10, 12, 15, 20, 25, 30)) x_train, y_train, x_test, y_test = split_train_validation(train_seq, train_labels, 0.8, uniform=True) # Train all neural networks with same parameters, this can be configured to train with different parameters rnn = RNNClassifier(input_al, output_dim=len(output_al), num_layers=2, hidden_dim=50, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, batch_size=32, nn_type='GRU') rnn.train(epochs=150, stop_acc=1.0, stop_epochs=2, verbose=1) sul = RnnBinarySUL(rnn) gt_sul = DfaSUL(ground_truth_model) random_walk_eq_oracle = RandomWalkEqOracle(input_al, sul, num_steps=10000, reset_prob=0.05) random_word_eq_oracle = RandomWordEqOracle(input_al, sul, min_walk_len=5, max_walk_len=25, num_walks=1000) random_w_eq_oracle = RandomWMethodEqOracle(input_al, sul, walks_per_state=200, walk_len=25) learned_model = run_Lstar(input_al, sul, random_word_eq_oracle, automaton_type='dfa', max_learning_rounds=5) from random import choice, randint random_tc = [] coverage_guided_tc = [] num_tc = 1000 for _ in range(num_tc): random_tc.append( tuple(choice(input_al) for _ in range(randint(10, 25)))) prefix = choice(learned_model.states).prefix middle = tuple(choice(input_al) for _ in range(20)) suffix = choice(learned_model.characterization_set) coverage_guided_tc.append(prefix + middle + suffix) num_adv_random = 0 for tc in random_tc: correct = gt_sul.query(tc) trained = sul.query(tc) if correct != trained: num_adv_random += 1 num_adv_guided = 0 for tc in coverage_guided_tc: correct = gt_sul.query(tc) trained = sul.query(tc) if correct != trained: num_adv_guided += 1 print(f'Random sampling: {round((num_adv_random/num_tc)*100,2)}') print(f'Guided sampling: {round((num_adv_guided/num_tc)*100,2)}')
def get_tomita(tomita_num): return load_automaton_from_file( f'TrainingDataAndAutomata/tomita_{tomita_num}.dot', automaton_type='dfa', compute_prefixes=True)
def get_ssh(): return load_automaton_from_file('TrainingDataAndAutomata/OpenSSH.dot', automaton_type='mealy', compute_prefixes=True)
def get_tcp(): return load_automaton_from_file( 'TrainingDataAndAutomata/TCP_Linux_Client.dot', automaton_type='mealy', compute_prefixes=True)
def get_coffee_machine(): return load_automaton_from_file( 'TrainingDataAndAutomata/Coffee_machine.dot', automaton_type='mealy', compute_prefixes=True)
def get_mqtt_mealy(): return load_automaton_from_file('TrainingDataAndAutomata/MQTT.dot', automaton_type='mealy', compute_prefixes=True)
def generate_concrete_data_MQTT(num_examples, num_rand_topics=5, lens=(1, 2, 4, 6, 10), uniform_concretion=False): mealy_machine = load_automaton_from_file( 'TrainingDataAndAutomata/MQTT.dot', automaton_type='mealy') input_al = mealy_machine.get_input_alphabet() sum_lens = sum(lens) if uniform_concretion: num_examples = num_examples // num_rand_topics # key is length, value is number of examples for said length ex_per_len = {} for l in lens: # if l == 1 or l == 2: # ex_per_len[l] = pow(len(input_al), l + 2) # sum_lens -= l # continue ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1 abstract_train_seq = [] train_labels = [] for l in ex_per_len.keys(): for i in range(ex_per_len[l]): if random.random() <= 0.15 and l > 1: seq = [choice(input_al) for _ in range(l)] else: seq = ['connect'] possible_inputs = ['subscribe', 'unsubscribe', 'publish'] if random.random() <= 0.2: possible_inputs.extend( ['disconnect', 'connect', 'invalid']) seq.extend([choice(possible_inputs) for _ in range(l - 1)]) mealy_machine.reset_to_initial() out = None for inp in seq: out = mealy_machine.step(inp) abstract_train_seq.append(seq) train_labels.append(out) random_topics = [gen_random_str() for _ in range(num_rand_topics)] concrete_train_seq = [] concrete_input_set = set() concrete_labels = [] for ind, seq in enumerate(abstract_train_seq): topics = [choice(random_topics) ] if not uniform_concretion else random_topics for t in topics: topic = t concrete_seq = [] for abstract_input in seq: if abstract_input == 'connect': concrete_seq.append('connect_Client1_ping=False') elif abstract_input == 'disconnect': concrete_seq.append(f'disconnect_Client1_var') elif abstract_input == 'subscribe': concrete_seq.append( f'subscribe_Client1_topic="{topic}"_retain=False') elif abstract_input == 'unsubscribe': concrete_seq.append(f'unsubscribe_Client1_topic="{topic}"') elif abstract_input == 'publish': concrete_seq.append( f'publish_Client1_global_topic="{topic}"') elif abstract_input == 'invalid': concrete_seq.append(f'invalid=Client1_opt=NULL') else: assert False if train_labels[ind] == 'CONNACK' or train_labels[ ind] == 'CONCLOSED': concrete_labels.append(train_labels[ind] + f'_User1') else: concrete_labels.append(train_labels[ind] + f'_User1_topic:{topic}') # concrete_labels.append(train_labels[ind]) concrete_train_seq.append(concrete_seq) concrete_input_set.update(concrete_seq) concrete_input_set = list(concrete_input_set) output_al = list(set(concrete_labels)) # map to integers input_dict = tokenized_dict(concrete_input_set) out_dict = tokenized_dict(output_al) train_seq = [ seq_to_tokens(word, input_dict) for word in concrete_train_seq ] train_labels = [seq_to_tokens(word, out_dict) for word in concrete_labels] return train_seq, train_labels, concrete_input_set, output_al
n_c, n_resample = n_c, n_resample elif exp_name == 'shared_coin': n_c, n_resample = n_c, n_resample elif exp_name == 'slot_machine': n_c, n_resample = n_c, n_resample else: if exp_name == 'first_grid': n_c, n_resample = 20, 1000 elif exp_name == 'second_grid': n_c, n_resample = 20, 1000 elif exp_name == 'shared_coin': n_c, n_resample = 50, 5000 elif exp_name == 'slot_machine': n_c, n_resample = 100, 10000 original_mdp = load_automaton_from_file(path_to_dir + file, automaton_type='mdp') input_alphabet = original_mdp.get_input_alphabet() original_prism_file_name = f'{benchmark_dir}/original_{exp_name}.prism' mdp_2_prism_format(original_mdp, name=exp_name, output_path=f'{original_prism_file_name}') mdp_sul = MdpSUL(original_mdp) eq_oracle = UnseenOutputRandomWalkEqOracle(input_alphabet, mdp_sul, num_steps=n_resample * (1 / 0.25), reset_after_cex=True, reset_prob=0.25)
run_times = [] # change on which folder to perform experiments exp, sul = dfa_2000_states_10_inputs, DfaSUL benchmarks = os.listdir(exp) benchmarks = benchmarks[:10] caching_opt = [True, False] closing_options = ['shortest_first', 'longest_first', 'single'] suffix_processing = ['all', 'single'] counter_example_processing = ['rs', 'longest_prefix', None] e_closedness = ['prefix', 'suffix'] for b in benchmarks: automaton = load_automaton_from_file(f'{exp}/{b}', automaton_type='dfa') input_al = automaton.get_input_alphabet() sul_dfa = sul(automaton) state_origin_eq_oracle = StatePrefixEqOracle(input_al, sul_dfa, walks_per_state=5, walk_len=25) learned_dfa, data = run_Lstar(input_al, sul_dfa, state_origin_eq_oracle, automaton_type='dfa', cache_and_non_det_check=False, cex_processing='rs',
import random from aalpy.SULs import MdpSUL, StochasticMealySUL from aalpy.base import SUL from aalpy.automata import Mdp, MdpState, StochasticMealyState, StochasticMealyMachine from aalpy.learning_algs import run_Lstar, run_stochastic_Lstar from aalpy.oracles import RandomWMethodEqOracle, RandomWordEqOracle from aalpy.utils import load_automaton_from_file model = load_automaton_from_file('CYW43455.dot', automaton_type='mealy', compute_prefixes=True) alphabet = model.get_input_alphabet() print(alphabet) class ModelSUL(SUL): def __init__(self, model): super().__init__() self.model = model self.last_output = 'init' def pre(self): self.model.reset_to_initial() self.last_output = 'init' def post(self): pass def step(self, letter): if not letter:
n_c = 10 n_resample = 1000 min_rounds = 25 max_rounds = 500 experiment_repetition = 10 uniform_parameters = False strategy = ["normal", "chi2"] # chi_square cex_sampling = [ 'bfs', ] # random:100:0.15 cex_processing = [None, 'longest_prefix'] # add a single prefix start = time.time() model_dict = { m.split('.')[0]: load_automaton_from_file(path_to_dir + m, automaton_type='mdp') for m in files } for strat in strategy: for cex_stat in cex_sampling: for cex_proc in cex_processing: print(strat, cex_stat, cex_proc) benchmark_dir = f'FM_mdp_smm/benchmark_22_04_{strat}_{cex_proc}/' for seed in range(experiment_repetition): print(seed) random.seed(seeds[seed]) import os if not os.path.exists(benchmark_dir): os.makedirs(benchmark_dir)
import unittest from aalpy.SULs import DfaSUL, MealySUL, MooreSUL from aalpy.automata import Dfa, MealyMachine, MooreMachine from aalpy.learning_algs import run_Lstar from aalpy.oracles import WMethodEqOracle, RandomWalkEqOracle, StatePrefixEqOracle, TransitionFocusOracle, \ RandomWMethodEqOracle, BreadthFirstExplorationEqOracle, RandomWordEqOracle, CacheBasedEqOracle, \ KWayStateCoverageEqOracle from aalpy.utils import get_Angluin_dfa, load_automaton_from_file correct_automata = {Dfa: get_Angluin_dfa(), MealyMachine: load_automaton_from_file('../DotModels/Angluin_Mealy.dot', automaton_type='mealy'), MooreMachine: load_automaton_from_file('../DotModels/Angluin_Moore.dot', automaton_type='moore')} suls = {Dfa: DfaSUL, MealyMachine: MealySUL, MooreMachine: MooreSUL} class DeterministicTest(unittest.TestCase): def prove_equivalence(self, learned_automaton): correct_automaton = correct_automata[learned_automaton.__class__] # only work if correct automaton is already minimal if len(learned_automaton.states) != len(correct_automaton.states): print(len(learned_automaton.states), len(correct_automaton.states)) return False alphabet = learned_automaton.get_input_alphabet()