Exemplo n.º 1
0
def generate_data_based_on_characterization_set(automaton,
                                                automaton_type='mealy'):
    from aalpy.SULs import MealySUL, DfaSUL
    from aalpy.oracles import RandomWalkEqOracle
    from aalpy.learning_algs import run_Lstar

    # automaton = load_automaton_from_file(path_to_automaton, automaton_type)
    alphabet = automaton.get_input_alphabet()
    eq_oracle = RandomWalkEqOracle(alphabet,
                                   automaton,
                                   num_steps=5000,
                                   reset_prob=0.09,
                                   reset_after_cex=True)

    sul = DfaSUL(automaton) if automaton_type == 'dfa' else MealySUL(automaton)

    automaton, data = run_Lstar(alphabet,
                                sul,
                                eq_oracle,
                                automaton_type=automaton_type,
                                print_level=0,
                                return_data=True,
                                suffix_closedness=True)

    characterization_set = data['characterization set']
    prefixes = [state.prefix for state in automaton.states]

    sequences = [p + e for e in characterization_set for p in prefixes]

    sequences.extend([
        p + tuple([i]) + e for p in prefixes
        for i in automaton.get_input_alphabet() for e in characterization_set
    ])
    # sequences.extend([p + e for p in sequences for e in characterization_set])
    for _ in range(1):
        sequences.extend([
            p + tuple([i]) + e for p in sequences
            for i in automaton.get_input_alphabet()
            for e in characterization_set
        ])
    for _ in range(3):
        sequences.extend(sequences)

    labels = [sul.query(s)[-1] for s in sequences]

    sequences = [list(s) for s in sequences]

    input_al = automaton.get_input_alphabet()
    output_al = {
        output
        for state in automaton.states for output in state.output_fun.values()
    }

    input_dict = tokenized_dict(input_al)
    out_dict = tokenized_dict(output_al)

    train_seq = [seq_to_tokens(word, input_dict) for word in sequences]
    train_labels = [seq_to_tokens(word, out_dict) for word in labels]

    return train_seq, train_labels
Exemplo n.º 2
0
    def test_all_configuration_combinations(self):
        angluin_example = get_Angluin_dfa()

        alphabet = angluin_example.get_input_alphabet()

        automata_type = ['dfa', 'mealy', 'moore']
        closing_strategies = ['shortest_first', 'longest_first', 'single']
        cex_processing = [None, 'longest_prefix', 'rs']
        suffix_closedness = [True, False]
        caching = [True, False]

        for automata in automata_type:
            for closing in closing_strategies:
                for cex in cex_processing:
                    for suffix in suffix_closedness:
                        for cache in caching:
                            sul = DfaSUL(angluin_example)

                            random_walk_eq_oracle = RandomWalkEqOracle(alphabet, sul, 5000, reset_after_cex=True)
                            state_origin_eq_oracle = StatePrefixEqOracle(alphabet, sul, walks_per_state=10, walk_len=50)
                            tran_cov_eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=200, walk_len=30,
                                                                       same_state_prob=0.3)
                            w_method_eq_oracle = WMethodEqOracle(alphabet, sul,
                                                                 max_number_of_states=len(angluin_example.states))
                            random_W_method_eq_oracle = RandomWMethodEqOracle(alphabet, sul,
                                                                              walks_per_state=10, walk_len=50)
                            bf_exploration_eq_oracle = BreadthFirstExplorationEqOracle(alphabet, sul, 3)
                            random_word_eq_oracle = RandomWordEqOracle(alphabet, sul)
                            cache_based_eq_oracle = CacheBasedEqOracle(alphabet, sul)
                            kWayStateCoverageEqOracle = KWayStateCoverageEqOracle(alphabet, sul)

                            oracles = [random_walk_eq_oracle, random_word_eq_oracle, random_W_method_eq_oracle,
                                       kWayStateCoverageEqOracle, cache_based_eq_oracle, bf_exploration_eq_oracle,
                                       tran_cov_eq_oracle, w_method_eq_oracle, state_origin_eq_oracle]

                            if not cache:
                                oracles.remove(cache_based_eq_oracle)

                            for oracle in oracles:
                                sul = DfaSUL(angluin_example)
                                oracle.sul = sul

                                learned_model = run_Lstar(alphabet, sul, oracle, automaton_type=automata,
                                                          closing_strategy=closing, suffix_closedness=suffix,
                                                          cache_and_non_det_check=cache, cex_processing=cex,
                                                          print_level=0)

                                is_eq = self.prove_equivalence(learned_model)
                                if not is_eq:
                                    print(oracle, automata)
                                    assert False

        assert True
Exemplo n.º 3
0
    def test_suffix_closedness(self):

        angluin_example = get_Angluin_dfa()

        alphabet = angluin_example.get_input_alphabet()

        suffix_closedness = [True, False]
        automata_type = ['dfa', 'mealy', 'moore']

        for automata in automata_type:
            for s_closed in suffix_closedness:
                sul = DfaSUL(angluin_example)
                eq_oracle = RandomWalkEqOracle(alphabet, sul, 500)

                learned_dfa = run_Lstar(alphabet,
                                        sul,
                                        eq_oracle,
                                        automaton_type=automata,
                                        suffix_closedness=s_closed,
                                        cache_and_non_det_check=True,
                                        cex_processing='rs',
                                        print_level=0)

                is_eq = self.prove_equivalence(learned_dfa)
                if not is_eq:
                    assert False

        assert True
Exemplo n.º 4
0
    def test_closing_strategies(self):

        dfa = get_Angluin_dfa()

        alphabet = dfa.get_input_alphabet()

        closing_strategies = ['shortest_first', 'longest_first', 'single']
        automata_type = ['dfa', 'mealy', 'moore']

        for automata in automata_type:
            for closing in closing_strategies:
                sul = DfaSUL(dfa)
                eq_oracle = RandomWalkEqOracle(alphabet, sul, 1000)

                learned_dfa = run_Lstar(alphabet,
                                        sul,
                                        eq_oracle,
                                        automaton_type=automata,
                                        closing_strategy=closing,
                                        cache_and_non_det_check=True,
                                        cex_processing='rs',
                                        print_level=0)

                is_eq = self.prove_equivalence(learned_dfa)
                if not is_eq:
                    assert False

        assert True
Exemplo n.º 5
0
    def test_cex_processing(self):
        angluin_example = get_Angluin_dfa()

        alphabet = angluin_example.get_input_alphabet()

        cex_processing = [None, 'longest_prefix', 'rs']
        automata_type = ['dfa', 'mealy', 'moore']

        for automata in automata_type:
            for cex in cex_processing:
                sul = DfaSUL(angluin_example)
                eq_oracle = RandomWalkEqOracle(alphabet, sul, 500)

                learned_dfa = run_Lstar(alphabet,
                                        sul,
                                        eq_oracle,
                                        automaton_type=automata,
                                        cache_and_non_det_check=True,
                                        cex_processing=cex,
                                        print_level=0)

                is_eq = self.prove_equivalence(learned_dfa)
                if not is_eq:
                    assert False

        assert True
Exemplo n.º 6
0
def random_dfa_example(alphabet_size,
                       number_of_states,
                       num_accepting_states=1):
    """
    Generate a random DFA machine and learn it.
    :param alphabet_size: size of the input alphabet
    :param number_of_states: number of states in the generated DFA
    :param num_accepting_states: number of accepting states
    :return: DFA
    """
    assert num_accepting_states <= number_of_states
    alphabet = list(string.ascii_letters[:26])[:alphabet_size]
    random_dfa = generate_random_dfa(number_of_states, alphabet,
                                     num_accepting_states)
    # visualize_automaton(random_dfa, path='correct')
    sul_dfa = DfaSUL(random_dfa)

    # examples of various equivalence oracles

    random_walk_eq_oracle = RandomWalkEqOracle(alphabet, sul_dfa, 5000)
    state_origin_eq_oracle = StatePrefixEqOracle(alphabet,
                                                 sul_dfa,
                                                 walks_per_state=10,
                                                 walk_len=50)
    tran_cov_eq_oracle = TransitionFocusOracle(alphabet,
                                               sul_dfa,
                                               num_random_walks=200,
                                               walk_len=30,
                                               same_state_prob=0.3)
    w_method_eq_oracle = WMethodEqOracle(alphabet,
                                         sul_dfa,
                                         max_number_of_states=number_of_states)
    random_W_method_eq_oracle = RandomWMethodEqOracle(alphabet,
                                                      sul_dfa,
                                                      walks_per_state=10,
                                                      walk_len=50)
    bf_exploration_eq_oracle = BreadthFirstExplorationEqOracle(
        alphabet, sul_dfa, 5)
    random_word_eq_oracle = RandomWordEqOracle(alphabet, sul_dfa)
    cache_based_eq_oracle = CacheBasedEqOracle(alphabet, sul_dfa)
    user_based_eq_oracle = UserInputEqOracle(alphabet, sul_dfa)
    kWayStateCoverageEqOracle = KWayStateCoverageEqOracle(alphabet, sul_dfa)
    learned_dfa = run_Lstar(alphabet,
                            sul_dfa,
                            random_walk_eq_oracle,
                            automaton_type='dfa',
                            cache_and_non_det_check=False,
                            cex_processing='rs')

    # visualize_automaton(learned_dfa)
    return learned_dfa
Exemplo n.º 7
0
    def test_eq_oracles(self):
        angluin_example = get_Angluin_dfa()

        alphabet = angluin_example.get_input_alphabet()

        automata_type = ['dfa', 'mealy', 'moore']

        for automata in automata_type:
            sul = DfaSUL(angluin_example)

            random_walk_eq_oracle = RandomWalkEqOracle(alphabet, sul, 5000, reset_after_cex=True)
            state_origin_eq_oracle = StatePrefixEqOracle(alphabet, sul, walks_per_state=10, walk_len=50)
            tran_cov_eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=200, walk_len=30,
                                                       same_state_prob=0.3)
            w_method_eq_oracle = WMethodEqOracle(alphabet, sul, max_number_of_states=len(angluin_example.states))
            random_W_method_eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=10, walk_len=50)
            bf_exploration_eq_oracle = BreadthFirstExplorationEqOracle(alphabet, sul, 3)
            random_word_eq_oracle = RandomWordEqOracle(alphabet, sul)
            cache_based_eq_oracle = CacheBasedEqOracle(alphabet, sul)
            kWayStateCoverageEqOracle = KWayStateCoverageEqOracle(alphabet, sul)

            oracles = [random_walk_eq_oracle, random_word_eq_oracle, random_W_method_eq_oracle, w_method_eq_oracle,
                       kWayStateCoverageEqOracle, cache_based_eq_oracle, bf_exploration_eq_oracle, tran_cov_eq_oracle,
                       state_origin_eq_oracle]

            for oracle in oracles:
                sul = DfaSUL(angluin_example)
                oracle.sul = sul

                learned_model = run_Lstar(alphabet, sul, oracle, automaton_type=automata,
                                          cache_and_non_det_check=True, cex_processing=None, print_level=0)

                is_eq = self.prove_equivalence(learned_model)
                if not is_eq:
                    print(oracle, automata)
                    assert False

        assert True
def verify_cex(aalpy_model, white_box_model, rnn, cex_set):
    """
    Verify that counterexamples are not spurious and find which model classified correctly
    :param aalpy_model: model obtained by our approach
    :param white_box_model: modle obtained by refinement-based learning
    :param rnn: RNN that serves as system under learning
    :param cex_set: found cases of non-conformance between two models
    :return:
    """
    correct_model = None
    for cex in cex_set:
        sul1, sul2 = DfaSUL(aalpy_model), DfaSUL(white_box_model)
        output_black_box = sul1.query(cex)[-1]
        output_white_box = sul2.query(cex)[-1]

        rnn.renew()
        rnn_sul = RNN_BinarySUL_for_Weiss_Framework(rnn)
        rnn_output = rnn_sul.query(cex)[-1]

        if output_black_box == output_white_box:
            return False
        if output_black_box != rnn_output and output_white_box != rnn_output:
            return False
        if output_black_box == rnn_output:
            if correct_model and correct_model == 'White-Box':
                assert False
            correct_model = 'Black-Box'
        else:
            print(output_black_box)
            print(rnn_output)
            if correct_model and correct_model == 'Black-Box':
                assert False
            correct_model = 'White-Box'

    print(f'All examples were classified correctly by the {correct_model} model and misclassified by the other.')
    return True
Exemplo n.º 9
0
def angluin_seminal_example():
    """
    Example automaton from Anguin's seminal paper.
    :return: learned DFA
    """
    dfa = get_Angluin_dfa()

    alph = dfa.get_input_alphabet()

    sul = DfaSUL(dfa)
    eq_oracle = RandomWalkEqOracle(alph, sul, 500)

    learned_dfa = run_Lstar(alph,
                            sul,
                            eq_oracle,
                            automaton_type='dfa',
                            cache_and_non_det_check=True,
                            cex_processing=None,
                            print_level=3)

    return learned_dfa
Exemplo n.º 10
0
    print(i)
    learning_time_dfa = []
    learning_time_mealy = []
    learning_time_moore = []

    total_time_dfa = []
    total_time_mealy = []
    total_time_moore = []

    states.append(num_states)

    for _ in range(repeat):
        dfa = generate_random_dfa(num_states,
                                  alphabet=alphabet,
                                  num_accepting_states=num_states // 2)
        sul = DfaSUL(dfa)

        # eq_oracle = StatePrefixEqOracle(alphabet, sul, walks_per_state=5, walk_len=40)
        eq_oracle = RandomWalkEqOracle(alphabet,
                                       sul,
                                       num_steps=9000,
                                       reset_prob=0.09)

        _, data = run_Lstar(alphabet,
                            sul,
                            eq_oracle,
                            cex_processing=cex_processing,
                            cache_and_non_det_check=False,
                            return_data=True,
                            automaton_type='dfa')
def run_comparison(example, train=True, num_layers=2, hidden_dim=50, rnn_class=GRUNetwork,
                   insufficient_testing=False, verbose=False):
    rnn, alphabet, train_set = train_or_load_rnn(example, num_layers=num_layers, hidden_dim=hidden_dim,
                                                 rnn_class=rnn_class, train=train)

    # initial examples for Weiss et Al
    all_words = sorted(list(train_set.keys()), key=lambda x: len(x))
    pos = next((w for w in all_words if rnn.classify_word(w) is True), None)
    neg = next((w for w in all_words if rnn.classify_word(w) is False), None)
    starting_examples = [w for w in [pos, neg] if None is not w]

    # Extract Automaton Using White-Box eq. query
    rnn.renew()
    if verbose:
        print('---------------------------------WHITE BOX EXTRACTION--------------------------------------------------')
    else:
        blockPrint()
    start_white_box = time.time()
    dfa_weiss = extract(rnn, time_limit=500, initial_split_depth=10, starting_examples=starting_examples)
    time_white_box = time.time() - start_white_box
    # Make sure that internal states are back to initial
    rnn.renew()

    if verbose:
        print('---------------------------------BLACK BOX EXTRACTION--------------------------------------------------')
    sul = RNN_BinarySUL_for_Weiss_Framework(rnn)

    alphabet = list(alphabet)

    # define the equivalence oracle
    if insufficient_testing:
        eq_oracle = RandomWordEqOracle(alphabet, sul, num_walks=100, min_walk_len=3, max_walk_len=12)
    else:
        eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=1000, walk_len=25)
        if 'tomita' not in example:
            eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=1000, walk_len=20)

    start_black_box = time.time()
    aalpy_dfa = run_Lstar(alphabet=alphabet, sul=sul, eq_oracle=eq_oracle, automaton_type='dfa', max_learning_rounds=10,
                          print_level=2 , cache_and_non_det_check=False, cex_processing='rs')
    time_black_box = time.time() - start_black_box

    enablePrint()
    if insufficient_testing:
        if len(aalpy_dfa.states) == len(dfa_weiss.Q):
            translated_weiss_2_aalpy = Weiss_to_AALpy_DFA_format(dfa_weiss)
            sul = DfaSUL(translated_weiss_2_aalpy)
            eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=1000, walk_len=10)

            cex = eq_oracle.find_cex(aalpy_dfa)
            if not cex:
                print(
                    '-------------------------WHITE-Box vs. BLACK-BOX WITH INSUFFICIENT TESTING -------------------------')
                print('White-box and Black-box technique extracted the same automaton.')
                print(f'White-box time: {round(time_white_box, 2)} seconds.')
                print(f'Black-box time: {round(time_black_box, 2)} seconds.')
            else:
                verify_cex(aalpy_dfa, translated_weiss_2_aalpy, rnn, [cex])
        return

    if len(aalpy_dfa.states) != len(dfa_weiss.Q):
        print('---------------------------------WHITE vs. BLACK BOX EXTRACTION----------------------------------------')
        nn_props = F'{"GRU" if rnn_class == GRUNetwork else "LSTM"}_layers_{num_layers}_dim_{hidden_dim}'
        print(f'Example       : {example}')
        print(f'Configuration : {nn_props}')
        print(f"Number of states\n  "
              f"White-box extraction: {len(dfa_weiss.Q)}\n  "
              f"Black-box extraction: {len(aalpy_dfa.states)}")

        translated_weiss_2_aalpy = Weiss_to_AALpy_DFA_format(dfa_weiss)

        sul = DfaSUL(translated_weiss_2_aalpy)
        eq_oracle = RandomWMethodEqOracle(alphabet, sul, walks_per_state=10000, walk_len=20)
        if 'tomita' not in example:
            eq_oracle = TransitionFocusOracle(alphabet, sul)

        cex_set = []
        for _ in range(10):
            cex = eq_oracle.find_cex(aalpy_dfa)
            if cex and cex not in cex_set:
                cex_set.append(cex)

        cex_set.sort(key=len)
        # verify that the counterexamples are not spurios and find out which model is correct one
        real_cex = verify_cex(aalpy_dfa, translated_weiss_2_aalpy, rnn, cex_set)
        if not real_cex:
            print('Spurious CEX')
            assert False
        #print('Few Counterexamples')
        #print('  ', cex_set[:3])
    else:
        print('Size of both models: ', len(aalpy_dfa.states))
Exemplo n.º 12
0
def accuracy_test():
    ground_truth_model = load_automaton_from_file(
        'TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa')
    input_al = ground_truth_model.get_input_alphabet()
    output_al = [1, 0]

    train_seq, train_labels = generate_data_from_automaton(ground_truth_model,
                                                           input_al,
                                                           num_examples=10000,
                                                           lens=(1, 2, 3, 5, 8,
                                                                 10, 12, 15,
                                                                 20, 25, 30))

    x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                              train_labels,
                                                              0.8,
                                                              uniform=True)

    # Train all neural networks with same parameters, this can be configured to train with different parameters
    rnn = RNNClassifier(input_al,
                        output_dim=len(output_al),
                        num_layers=2,
                        hidden_dim=50,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test,
                        batch_size=32,
                        nn_type='GRU')

    rnn.train(epochs=150, stop_acc=1.0, stop_epochs=2, verbose=1)

    sul = RnnBinarySUL(rnn)
    gt_sul = DfaSUL(ground_truth_model)

    random_walk_eq_oracle = RandomWalkEqOracle(input_al,
                                               sul,
                                               num_steps=10000,
                                               reset_prob=0.05)
    random_word_eq_oracle = RandomWordEqOracle(input_al,
                                               sul,
                                               min_walk_len=5,
                                               max_walk_len=25,
                                               num_walks=1000)
    random_w_eq_oracle = RandomWMethodEqOracle(input_al,
                                               sul,
                                               walks_per_state=200,
                                               walk_len=25)

    learned_model = run_Lstar(input_al,
                              sul,
                              random_word_eq_oracle,
                              automaton_type='dfa',
                              max_learning_rounds=5)

    from random import choice, randint
    random_tc = []
    coverage_guided_tc = []
    num_tc = 1000
    for _ in range(num_tc):
        random_tc.append(
            tuple(choice(input_al) for _ in range(randint(10, 25))))

        prefix = choice(learned_model.states).prefix
        middle = tuple(choice(input_al) for _ in range(20))
        suffix = choice(learned_model.characterization_set)
        coverage_guided_tc.append(prefix + middle + suffix)

    num_adv_random = 0
    for tc in random_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_random += 1

    num_adv_guided = 0
    for tc in coverage_guided_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_guided += 1

    print(f'Random sampling: {round((num_adv_random/num_tc)*100,2)}')
    print(f'Guided sampling: {round((num_adv_guided/num_tc)*100,2)}')