def test_get_context_regex(self):
        feature_bundle_list = FeatureBundleList([{'cons': '+'}], is_one_item_list=True)
        regex_str = get_context_regex(feature_bundle_list)
        print(regex_str)
        regex = str2regexp(regex_str)
        eval_word = regex.evalWordP("t")
        print(eval_word)
        assert eval_word

        eval_word = regex.evalWordP("a")
        print(eval_word)
        assert not eval_word

        eval_word = regex.evalWordP("tt")
        print(eval_word)
        assert not eval_word

        regex1_str = "(a|b|c)"
        regex2_str = "a+b+c"

        regex_1 = str2regexp(regex1_str)
        regex_2 = str2regexp(regex2_str)

        comp = regex_1.compare(regex_2)
        assert comp
示例#2
0
def regex_inclusion(target, predict):
    '''
    check if predict is belong to target 
    Let assume that target is superset of predict.
    '''
    target = str2regexp(target).toDFA()
    predict = str2regexp(predict).toDFA()

    if (target & predict).witness() != None and (~target
                                                 & predict).witness() == None:
        return True
    else:
        return False
示例#3
0
文件: lex_def.py 项目: mebusy/codeLib
    def test(str_re):
        print str_re
        reg = reex.str2regexp(str_re, no_table=0)
        print reg

        nfa = reg.nfaPD()
        nfa.display()
示例#4
0
def pos_membership_test(regex, examples):
    regex = str2regexp(regex)
    for word in examples:
        if regex.evalWordP(word):
            continue
        else:
            return False
    return True
示例#5
0
文件: NFA.py 项目: mebusy/codeLib
def lexerNFA(ls_expr, ls_nfas):
    """
    compiler: combine nfas to 1 nfa
    """

    dict_finalState_tokenIndex_mapping = {}

    # exps = [ 'a' , 'abb' , 'a*bb*' ]
    exps = ls_expr
    regexes = [reex.str2regexp(e, no_table=0) for e in exps]
    nfas = [regex.nfaPD() for regex in regexes]
    nfas.extend(ls_nfas)

    nStates = [len(nfa) for nfa in nfas]
    iStateStart = [1 + sum(nStates[:i]) for i, nfa in enumerate(nfas)]
    for i, nfa in enumerate(nfas):
        nfa.renameStates(xrange(iStateStart[i], iStateStart[i] + len(nfa)))
        # nfa.display()
        # print i, nfa.succintTransitions()

    N = fa.NFA()
    N.addState('0')
    for i, nfa in enumerate(nfas):
        for sym in nfa.Sigma:
            N.addSigma(sym)
        for state in nfa.States:
            N.addState(state)
        for state_idx in nfa.Final:
            # handle conflict
            newFinal = state_idx + iStateStart[i]
            N.addFinal(newFinal)
            dict_finalState_tokenIndex_mapping[newFinal] = i
        for stName0, syms, stName1 in nfa.succintTransitions():
            # shortcut, maybe not correct
            idx_st0 = (int)(stName0)
            idx_st1 = (int)(stName1)
            symbols = [s for s in syms.split(', ')]
            for symb in symbols:
                N.addTransition(idx_st0, symb, idx_st1)
        for state_idx in nfa.Initial:
            N.addTransition(0, '@epsilon', state_idx + iStateStart[i])

    N.setInitial([0])
    # print N
    # N.display()
    return N, dict_finalState_tokenIndex_mapping
示例#6
0
from algorithm import *
from FAdo.reex import str2regexp

if __name__ == '__main__':

    data_path = 'data/total/max_set20/set2regex/test.txt'

    with open(data_path, 'r') as rf:
        test_data = rf.read().split('\n')
        match = 0
        for data in test_data:
            ground_truth = data.split('\t')[-1].replace(' ', '')
            ground_truth = preprocess_regex(ground_truth)
            data = data.split('\t')[:-1]
            data = list(filter(('none').__ne__, data))
            data = [d.replace(' ', '') for d in data]
            prediction_regex = synthesize(data).toDFA()
            regexp = str2regexp(ground_truth).toDFA()

            if regexp.equal(prediction_regex):
                print('input: ', data)
                print('ground truth: ', regexp.reCG())
                print('prediction : ', prediction_regex.reCG())
                print('\n')
                match += 1

        accuracy = float(match / len(test_data))
        print('Accuracy: ', accuracy)
示例#7
0
文件: test.py 项目: mebusy/codeLib
import FAdo.fa as fa
import FAdo.reex as reex
from lex_def import generateRE_WS
reg = reex.str2regexp("(a*)?b", no_table=0)

print repr(reg)

nfa = reg.nfaPD()
# nfa.display()
for stName0, syms, stName1 in nfa.succintTransitions():
    print syms
    for i in syms.split(', '):
        print '\t', repr(i)

S = nfa.Initial
print nfa.evalSymbol(S, 'a')

dfa = nfa.toDFA()
dfa.display()

dfa.evalSymbolI(dfa.Initial, 'a')
示例#8
0
def regex_equal(regex1, regex2):
    dfa1 = str2regexp(regex1).toDFA()
    dfa2 = str2regexp(regex2).toDFA()
    return dfa1.equal(dfa2)