def build_pta(s_plus: Set[str], s_minus: Set[str]=set()) -> DFA: """ Function that builds a prefix tree acceptor from the example strings S = S+ union S- :param s_plus: Set containing positive examples of the target language :type s_plus: set :param s_minus: Set containing negative examples of the target language :type s_minus: set :return: An dfa representing a prefix tree acceptor :rtype: DFA """ samples = s_plus.union(s_minus) alphabet = utils.determine_alphabet(samples) pta = DFA(alphabet) for letter in alphabet: pta.add_transition(State(''), State(letter), letter) states = { State(u) for u in utils.prefix_set(samples) } new_states = set() for u in states: for a in alphabet: ua = State(u.name + a) if ua not in states: new_states.add(ua) pta.add_transition(u, ua, a) states.update(new_states) for u in states: if u.name in s_plus: pta.accept_states.add(u) if u.name in s_minus: pta.reject_states.add(u) pta.states = states return pta
def test_determine_alphabet_01(self): s = {'abcdefghi'} alphabet = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'} self.assertSetEqual(alphabet, utils.determine_alphabet(s))
def test_determine_alphabet_02(self): s = {'abc', 'cba', 'bca', 'a', 'b', 'c', 'aa', 'bb', 'cc', 'd'} alphabet = {'a', 'b', 'c', 'd'} self.assertSetEqual(alphabet, utils.determine_alphabet(s))
def __init__(self, alphabet: Set[str], pos_examples: Set[str] = None, neg_examples: Set[str] = None, oracle: Oracle = None, algorithm: str = 'rpni'): """ :param alphabet: Alphabet of the target language we are trying to learn. :type alphabet: Set[str] :param pos_examples: Set of positive example strings from the target language. :type pos_examples: Set[str] :param neg_examples: Set of negative example strings, i.e. strings that do not belong in the target language. :type neg_examples: Set[str] :param oracle: Minimally adequate teacher (MAT) :type oracle: Oracle :param algorithm: The algorithm to use when attempting to learn the grammar from the example strings. The options are: gold rpni lstar nlstar :type algorithm: str """ if not isinstance(alphabet, set) or len(alphabet) == 0: raise ValueError( 'The alphabet has to be a set with at least one element') self._alphabet = alphabet self._learners = { 'gold': lambda: algorithms.Gold(pos_examples, neg_examples, self._alphabet ).learn(), 'rpni': lambda: algorithms.RPNI(pos_examples, neg_examples, self._alphabet) .learn(), 'lstar': lambda: algorithms.LSTAR(self._alphabet, oracle).learn(), 'nlstar': lambda: algorithms.NLSTAR(self._alphabet, oracle).learn() } if algorithm not in self._learners: raise ValueError('Algorithm \'{}\' unknown, the following ' 'algorithms are available:\n{}'.format( algorithms, '\n'.join(self._learners.keys()))) if algorithm in ['rpni', 'gold']: if not isinstance(pos_examples, set): raise ValueError('pos_examples should be a set') if not isinstance(neg_examples, set): raise ValueError('neg_examples should be a set') if len(pos_examples.intersection(neg_examples)) != 0: raise ValueError( 'The sets of positive and negative example ' 'strings should not contain the same string(s)') if pos_examples is None or neg_examples is None: raise ValueError( 'pos_examples and neg_examples can not be None ' 'for algorithm \'{}\''.format(algorithm)) self._alphabet = utils.determine_alphabet( pos_examples.union(neg_examples)) elif algorithm in ['lstar', 'nlstar']: if oracle is None: raise ValueError( 'oracle can not be None for algorithm \'{}\''.format( algorithm)) self._algorithm = algorithm