def test_get_context_regex(self): feature_bundle_list = FeatureBundleList([{'cons': '+'}], is_one_item_list=True) regex_str = get_context_regex(feature_bundle_list) print(regex_str) regex = str2regexp(regex_str) eval_word = regex.evalWordP("t") print(eval_word) assert eval_word eval_word = regex.evalWordP("a") print(eval_word) assert not eval_word eval_word = regex.evalWordP("tt") print(eval_word) assert not eval_word regex1_str = "(a|b|c)" regex2_str = "a+b+c" regex_1 = str2regexp(regex1_str) regex_2 = str2regexp(regex2_str) comp = regex_1.compare(regex_2) assert comp
def regex_inclusion(target, predict): ''' check if predict is belong to target Let assume that target is superset of predict. ''' target = str2regexp(target).toDFA() predict = str2regexp(predict).toDFA() if (target & predict).witness() != None and (~target & predict).witness() == None: return True else: return False
def test(str_re): print str_re reg = reex.str2regexp(str_re, no_table=0) print reg nfa = reg.nfaPD() nfa.display()
def pos_membership_test(regex, examples): regex = str2regexp(regex) for word in examples: if regex.evalWordP(word): continue else: return False return True
def lexerNFA(ls_expr, ls_nfas): """ compiler: combine nfas to 1 nfa """ dict_finalState_tokenIndex_mapping = {} # exps = [ 'a' , 'abb' , 'a*bb*' ] exps = ls_expr regexes = [reex.str2regexp(e, no_table=0) for e in exps] nfas = [regex.nfaPD() for regex in regexes] nfas.extend(ls_nfas) nStates = [len(nfa) for nfa in nfas] iStateStart = [1 + sum(nStates[:i]) for i, nfa in enumerate(nfas)] for i, nfa in enumerate(nfas): nfa.renameStates(xrange(iStateStart[i], iStateStart[i] + len(nfa))) # nfa.display() # print i, nfa.succintTransitions() N = fa.NFA() N.addState('0') for i, nfa in enumerate(nfas): for sym in nfa.Sigma: N.addSigma(sym) for state in nfa.States: N.addState(state) for state_idx in nfa.Final: # handle conflict newFinal = state_idx + iStateStart[i] N.addFinal(newFinal) dict_finalState_tokenIndex_mapping[newFinal] = i for stName0, syms, stName1 in nfa.succintTransitions(): # shortcut, maybe not correct idx_st0 = (int)(stName0) idx_st1 = (int)(stName1) symbols = [s for s in syms.split(', ')] for symb in symbols: N.addTransition(idx_st0, symb, idx_st1) for state_idx in nfa.Initial: N.addTransition(0, '@epsilon', state_idx + iStateStart[i]) N.setInitial([0]) # print N # N.display() return N, dict_finalState_tokenIndex_mapping
from algorithm import * from FAdo.reex import str2regexp if __name__ == '__main__': data_path = 'data/total/max_set20/set2regex/test.txt' with open(data_path, 'r') as rf: test_data = rf.read().split('\n') match = 0 for data in test_data: ground_truth = data.split('\t')[-1].replace(' ', '') ground_truth = preprocess_regex(ground_truth) data = data.split('\t')[:-1] data = list(filter(('none').__ne__, data)) data = [d.replace(' ', '') for d in data] prediction_regex = synthesize(data).toDFA() regexp = str2regexp(ground_truth).toDFA() if regexp.equal(prediction_regex): print('input: ', data) print('ground truth: ', regexp.reCG()) print('prediction : ', prediction_regex.reCG()) print('\n') match += 1 accuracy = float(match / len(test_data)) print('Accuracy: ', accuracy)
import FAdo.fa as fa import FAdo.reex as reex from lex_def import generateRE_WS reg = reex.str2regexp("(a*)?b", no_table=0) print repr(reg) nfa = reg.nfaPD() # nfa.display() for stName0, syms, stName1 in nfa.succintTransitions(): print syms for i in syms.split(', '): print '\t', repr(i) S = nfa.Initial print nfa.evalSymbol(S, 'a') dfa = nfa.toDFA() dfa.display() dfa.evalSymbolI(dfa.Initial, 'a')
def regex_equal(regex1, regex2): dfa1 = str2regexp(regex1).toDFA() dfa2 = str2regexp(regex2).toDFA() return dfa1.equal(dfa2)