Exemplo n.º 1
0
class StateSar:
    def __init__(self, init_state):
        super().__init__()
        self._sar = RegexpSar()
        self._state = init_state
        self._all_states = set()

    def add_regexp(self, state, regexp, regexp_callback):
        self._all_states.add(state)

        def inner_callback(from_pos, to_pos):
            if self._state == state:
                regexp_callback(from_pos, to_pos)
            self._sar.continue_from(to_pos)

        self._sar.add_regexp(regexp, inner_callback)

    def match(self, match_str):
        self._sar.match(match_str)

    def set_state(self, new_state):
        if new_state not in self._all_states:
            raise Exception("Unknown state: " + str(new_state))
        self._state = new_state

    def get_state(self):
        return self._state
Exemplo n.º 2
0
def test18():
    myStr = "first abbbbc second 123 end"
    matches = []
    regexps = [
        ["ab+c", "First Match"],
        ["\\d+", "Second Match"],
    ]

    sar = RegexpSar()
    for re in regexps:
        reStr = re[0]
        reTitle = re[1]

        def gen_call(reTitleInternal):
            def call(from_pos, to_pos):
                matchStr = myStr[from_pos:to_pos]
                matches.append(reTitleInternal + ": " + matchStr)
                sar.continue_from(to_pos)

            return call

        sar.add_regexp(reStr, gen_call(reTitle))

    sar.match(myStr)
    validate(2, len(matches))
    validate("First Match: abbbbc", matches[0])
    validate("Second Match: 123", matches[1])
Exemplo n.º 3
0
def test14():
    myStr = "backup:x:34:34:backup:/var/backups:/usr/sbin/nologin"
    elemNum = 6
    sar = RegexpSar()
    matchCount = VarPointer()
    strt = VarPointer()
    end = VarPointer()

    def sar_callback(from_pos, to_pos):
        matchCount.append(1)
        sar.continue_from(to_pos)
        if matchCount.val == elemNum - 1:
            strt.set(to_pos)
        elif matchCount.val == elemNum:
            end.set(from_pos)
            sar.stop_match()

    sar.add_regexp(":", sar_callback)
    sar.match(myStr)
    if end.val > 0:
        matchStr = myStr[strt.val:end.val]
        validate("/var/backups", matchStr)
    else:
        # fail on purpuse
        validate(0 == 1, "no match found")
Exemplo n.º 4
0
def test5():
    obj = VarPointer()
    sar = RegexpSar()
    re = "abcd"
    myStr = "qabcdef"
    sar.add_regexp(re, lambda from_pos, to_pos: obj.append(1))
    sar.match(myStr)
    validate(1, obj.val)
Exemplo n.º 5
0
def test10():
    myStr = "abc"
    res = []
    sar = RegexpSar()
    sar.add_regexp("\\w+",
                   lambda from_pos, to_pos: res.append(myStr[from_pos:to_pos]))
    sar.match(myStr)
    validate("abc", res[0])
    validate("bc", res[1])
    validate("c", res[2])
    validate(3, len(res))
Exemplo n.º 6
0
def test15():
    myStr = "123abc"
    obj = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        obj.set(1)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\w+", call)
    sar.match(myStr)

    validate(1, obj.val)
Exemplo n.º 7
0
def test11():
    myStr = "123abc456"
    res = []
    sar = RegexpSar()

    def sar_callback(from_pos, to_pos):
        res.append(myStr[from_pos:to_pos])
        sar.continue_from(to_pos + 1)

    sar.add_regexp("\\a+", sar_callback)
    sar.match(myStr)
    validate("abc", res[0])
    validate(1, len(res))
Exemplo n.º 8
0
def test17():
    myStr = "mm abc nn"
    obj = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        obj.set(1)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\a+", call)
    sar.match(myStr)

    if obj.val != 1:
        validate(0 == 1, "regexp should match")
Exemplo n.º 9
0
def test9():
    objFrom = VarPointer()
    objTo = VarPointer()
    sar = RegexpSar()
    myStr = "1234"
    re = "\\d+"

    def sar_callback(from_pos, to_pos):
        objFrom.set(from_pos)
        objTo.set(to_pos)
        sar.stop_match()

    sar.add_regexp(re, sar_callback)
    sar.match(myStr)
    validate(myStr, myStr[objFrom.val:objTo.val])
Exemplo n.º 10
0
def test22():
    sar = RegexpSar()
    alpha_pos = VarPointer()
    alpha_pos.val = -1
    anchorPos = VarPointer()

    def sar_digit_callback(digit_pos, to_pos):
        if alpha_pos.val != -1:
            dist = digit_pos - alpha_pos.val
            if (dist == 1):
                anchorPos.set(digit_pos)

    sar.add_regexp("\\a", lambda from_pos, to_pos: alpha_pos.set(from_pos))
    sar.add_regexp("\\d", sar_digit_callback)
    sar.match("aa bb2cc dd")
    validate(5, anchorPos.val)
Exemplo n.º 11
0
def test21():
    # get match only at certain position
    myStr = "aaaaaaaaaaaaaaaaaaaaaaaaaabbbc"
    matched = VarPointer()
    sar1 = RegexpSar()
    sar2 = RegexpSar()

    def sar1_callback(from_pos, to_pos):
        sar2.match_at(myStr, to_pos)
        sar1.stop_match()

    sar2.add_regexp("b+c", lambda from_pos, to_pos: matched.append(1))
    sar1.add_regexp("a+", sar1_callback)
    sar1.match(myStr)

    validate(1, matched.val)
Exemplo n.º 12
0
def test16():
    myStr = "123abc"
    res = []
    sar = RegexpSar()

    def call(from_pos, to_pos):
        res.append(myStr[from_pos:to_pos])
        sar.continue_from(to_pos + 1)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\a+", call)
    sar.match(myStr)

    validate("123", res[0])
    validate("bc", res[1])
    validate(2, len(res))
Exemplo n.º 13
0
def match_test(all_regexp, match_string, match_expected):
    sar = RegexpSar()
    path_res = 0
    for i in range(len(all_regexp)):
        re_num = 2**i

        # NOTE: this is done since re_num is not defined every iteration, but only once
        def gen_callback(cur_re_num):
            def match_callback(from_pos, to_pos):
                nonlocal path_res
                path_res += cur_re_num

            return match_callback

        sar.add_regexp(all_regexp[i], gen_callback(re_num))
    sar.match(match_string)
    assert match_expected == path_res, f"Match fail for [{', '.join(all_regexp)}] in >>{match_string}<<"
Exemplo n.º 14
0
def test13():
    myStr = "123abc"
    res = []
    sar = RegexpSar()

    def call(from_pos, to_pos):
        res.append(myStr[from_pos:to_pos])
        sar.continue_from(to_pos)

    sar.add_regexp("\\d+", call)
    sar.add_regexp("\\w+", call)
    sar.match(myStr)

    validate("123", res[0])
    # TODO check if this is a real error?
    validate("123abc", res[1])
    validate(2, len(res))
Exemplo n.º 15
0
def test7():
    obj = VarPointer()
    sar = RegexpSar()
    sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1))
    sar.match("qabcdeabcdkabcdf")
    validate(3, obj.val)

    obj2 = VarPointer()
    sar2 = RegexpSar()

    def sar2_callback(from_pos, to_pos):
        obj2.append(1)
        if obj2.val == 2:
            sar2.stop_match()

    sar2.add_regexp("abcd", sar2_callback)
    sar2.match("qabcdeabcdkabcdf")
    validate(2, obj2.val)
Exemplo n.º 16
0
def test19():
    # get third match and stop
    myStr = "aa11 bb22 cc33 dd44"
    matchCount = VarPointer()
    matchedStr3 = VarPointer()
    sar = RegexpSar()

    def call(from_pos, to_pos):
        matchCount.append(1)
        if matchCount.val == 3:
            matchedStr3.val = myStr[from_pos:to_pos]
            sar.stop_match()
        else:
            sar.continue_from(to_pos)

    sar.add_regexp("\\w+", call)
    sar.match(myStr)

    validate(3, matchCount.val)
    validate("cc33", matchedStr3.val)
Exemplo n.º 17
0
def test8():
    objFrom = VarPointer()
    objTo = VarPointer()
    sar = RegexpSar()
    myStr = "qabcdef"
    re = "abcd"

    def sar_callback(from_pos, to_pos):
        objFrom.set(from_pos)
        objTo.set(to_pos)

    sar.add_regexp(re, sar_callback)
    sar.match(myStr)
    validate(1, objFrom.val)
    validate(5, objTo.val)
    matchStr = myStr[objFrom.val:objTo.val]
    validate(re, matchStr)
    objFrom.set(0)
    objTo.set(0)
    sar.match("qqqqabcdttt")
    validate(4, objFrom.val)
    validate(8, objTo.val)
Exemplo n.º 18
0
def test3():
    obj = VarPointer()
    sar = RegexpSar()
    sar.add_regexp("abcd", lambda from_pos, to_pos: obj.append(1))
    sar.match("qqqqqqqqqqqq")
    validate(0, obj.val)
Exemplo n.º 19
0
    # reset words in sentence to false
    is_alice_in_sentence = False
    is_cat_in_sentence = False
    is_rabbit_in_sentence = False


# get Alice In Wonderland book content
with open("./alice_in_wonderland.txt", "r") as alice_book:
    text = alice_book.read()

# add alice/cat/rabbit regexps
sar.add_regexp('\\^\\walice\\^\\w', find_alice)
sar.add_regexp('\\^\\wAlice\\^\\w', find_alice)
sar.add_regexp('\\^\\wcat\\^\\w', find_cat)
sar.add_regexp('\\^\\wCat\\^\\w', find_cat)
sar.add_regexp('\\^\\wrabbit\\^\\w', find_rabbit)
sar.add_regexp('\\^\\wRabbit\\^\\w', find_rabbit)

# add end of sentence regexps,
# NOTE that they point to the same callback
for cur_re in ['\\.', '\\?', '!', ';']:
    sar.add_regexp(cur_re, end_of_sentence)

# run the regexps on the text
sar.match(text)

# the matching has completed, show the results:
print("Alice + Cat Matches: " + str(alice_and_cat_position))
print("Alice + Rabbit Matches: " + str(alice_and_rabbit_position))
Exemplo n.º 20
0
from regexp_sar import RegexpSar

sar = RegexpSar()

match_str = "hello world abc"

sar.add_regexp(
    '\w+', lambda from_pos, to_pos: print("Match Word: " + match_str[from_pos:
                                                                     to_pos]))
sar.add_regexp(
    'world', lambda from_pos, to_pos: print("Found world from: " + str(
        from_pos) + " to: " + str(to_pos)))
'''
Output:
    Match Word: hello
    Match Word: ello
    Match Word: llo
    Match Word: lo
    Match Word: o
    Match Word: world
    Found world from: 6 to: 11
    Match Word: orld
    Match Word: rld
    Match Word: ld
    Match Word: d
    Match Word: abc
    Match Word: bc
    Match Word: c
'''
sar.match(match_str)
# name of function we are currently in its body (with its body)
inside_function_name = None
curly_bracket_count = 0


def handle_open_curly_bracket(from_pos, to_pos):
    global curly_bracket_count, inside_function_name
    if curly_bracket_count == 0:
        inside_function_name = last_found_function
    curly_bracket_count += 1


sar.add_regexp("{", handle_open_curly_bracket)


def handle_close_curly_bracket(from_pos, to_pos):
    global curly_bracket_count
    curly_bracket_count -= 1


sar.add_regexp("}", handle_close_curly_bracket)


def handle_required_method_found(from_pos, to_pos):
    print(f"found at: {inside_function_name}")


sar.add_regexp(f"\\^\\w{required_method}\\^\\w", handle_required_method_found)

sar.match(c_content)