示例#1
0
def build_pta(s_plus: Set[str], s_minus: Set[str]=set()) -> DFA:
    """
    Function that builds a prefix tree acceptor from the example strings
    S = S+ union S-

    :param s_plus: Set containing positive examples of the target language
    :type s_plus: set
    :param s_minus: Set containing negative examples of the target language
    :type s_minus: set
    :return: An dfa representing a prefix tree acceptor
    :rtype: DFA
    """
    samples = s_plus.union(s_minus)

    alphabet = utils.determine_alphabet(samples)
    pta = DFA(alphabet)

    for letter in alphabet:
        pta.add_transition(State(''), State(letter), letter)

    states = {
        State(u) for u in utils.prefix_set(samples)
    }

    new_states = set()
    for u in states:
        for a in alphabet:
            ua = State(u.name + a)
            if ua not in states:
                new_states.add(ua)

            pta.add_transition(u, ua, a)

    states.update(new_states)

    for u in states:
        if u.name in s_plus:
            pta.accept_states.add(u)
        if u.name in s_minus:
            pta.reject_states.add(u)

    pta.states = states

    return pta
示例#2
0
 def test_determine_alphabet_01(self):
     s = {'abcdefghi'}
     alphabet = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}
     self.assertSetEqual(alphabet, utils.determine_alphabet(s))
示例#3
0
    def test_determine_alphabet_02(self):
        s = {'abc', 'cba', 'bca', 'a', 'b', 'c', 'aa', 'bb', 'cc', 'd'}
        alphabet = {'a', 'b', 'c', 'd'}

        self.assertSetEqual(alphabet, utils.determine_alphabet(s))
示例#4
0
    def __init__(self,
                 alphabet: Set[str],
                 pos_examples: Set[str] = None,
                 neg_examples: Set[str] = None,
                 oracle: Oracle = None,
                 algorithm: str = 'rpni'):
        """
        :param alphabet: Alphabet of the target language we are
                         trying to learn.
        :type alphabet: Set[str]
        :param pos_examples: Set of positive example strings
                             from the target language.
        :type pos_examples: Set[str]
        :param neg_examples: Set of negative example strings,
                             i.e. strings that do not belong in
                             the target language.
        :type neg_examples: Set[str]
        :param oracle: Minimally adequate teacher (MAT)
        :type oracle: Oracle
        :param algorithm: The algorithm to use when attempting to
                          learn the grammar from the example strings.
                          The options are:
                          gold
                          rpni
                          lstar
                          nlstar
        :type algorithm: str
        """
        if not isinstance(alphabet, set) or len(alphabet) == 0:
            raise ValueError(
                'The alphabet has to be a set with at least one element')

        self._alphabet = alphabet

        self._learners = {
            'gold':
            lambda: algorithms.Gold(pos_examples, neg_examples, self._alphabet
                                    ).learn(),
            'rpni':
            lambda: algorithms.RPNI(pos_examples, neg_examples, self._alphabet)
            .learn(),
            'lstar':
            lambda: algorithms.LSTAR(self._alphabet, oracle).learn(),
            'nlstar':
            lambda: algorithms.NLSTAR(self._alphabet, oracle).learn()
        }

        if algorithm not in self._learners:
            raise ValueError('Algorithm \'{}\' unknown, the following '
                             'algorithms are available:\n{}'.format(
                                 algorithms, '\n'.join(self._learners.keys())))

        if algorithm in ['rpni', 'gold']:
            if not isinstance(pos_examples, set):
                raise ValueError('pos_examples should be a set')
            if not isinstance(neg_examples, set):
                raise ValueError('neg_examples should be a set')

            if len(pos_examples.intersection(neg_examples)) != 0:
                raise ValueError(
                    'The sets of positive and negative example '
                    'strings should not contain the same string(s)')

            if pos_examples is None or neg_examples is None:
                raise ValueError(
                    'pos_examples and neg_examples can not be None '
                    'for algorithm \'{}\''.format(algorithm))

            self._alphabet = utils.determine_alphabet(
                pos_examples.union(neg_examples))

        elif algorithm in ['lstar', 'nlstar']:
            if oracle is None:
                raise ValueError(
                    'oracle can not be None for algorithm \'{}\''.format(
                        algorithm))

        self._algorithm = algorithm