예제 #1
0
파일: grammar.py 프로젝트: aipyth/MaThEX
 def __init__(self, X={}, D={}, acsiom=None, P={}):
     self.X = Set()
     self.D = Set()
     self.add_terminal_symbols(X)
     self.add_nonterminal_symbols(D)
     self.set_acsiom(acsiom)
     self.set_prod_rules(P)
예제 #2
0
파일: G.py 프로젝트: zloutek1/automata
    def toNFA(self):
        """
        convert Grammar to NFA
            1. rule A -> aB convert to δ[Ā, a] = B̄
            2. rule A -> a  convert to δ[Ā, a] = qf
            3. S̄ ∈ F if there is a rule S -> ε
        """
        import NFA

        Q = Set(f"{A}̄" for A in self.N).union(Set("qf"))
        q0 = f"{self.S}̄"
        δ = NFA.δ(Q=Q, Σ=self.Σ)
        F = Set("S̄", "qf") if "ε" in self.P["S"] else Set("qf")

        M = NFA(Q, self.Σ, δ, q0, F)

        for A in self.N:
            for rule in self.P[A]:
                if len(rule) == 2:
                    a, B = rule
                    δ[f"{A}̄", a].add(f"{B}̄")

                elif len(rule) == 1 and rule != "ε":
                    a = rule
                    δ[f"{A}̄", a].add("qf")

        return M
예제 #3
0
파일: CFG.py 프로젝트: zloutek1/automata
    def remove_ε(self):
        import CFG
        import re

        Nε = self.Nε

        N = self.N
        S = self.S
        P = CFG.P()

        for A in self.N:
            P[A] = self.P[A] - Set("ε")
            for opt in P[A]:
                for subset in all_subsets(Nε):
                    new_opt = re.sub(f"[ε{''.join(subset)}]", "", str(opt))
                    if new_opt != "":
                        P[A].add(new_opt)

        for opt in self.P[self.S]:
            if all(X in Nε for X in opt):
                N |= (Set("S'"))
                S = "S'"
                P["S'"] = f"ε | {self.S}"
                break

        G = CFG(N, self.Σ, P, S)
        return G
예제 #4
0
    def __init__(self,
                 worker_num=10,
                 chunk_size=10000,
                 log_interval=600,
                 data_dir='data',
                 log_dir='log'):
        self.chunk_size = chunk_size
        self.log_interval = log_interval
        self.urls = Queue()
        self.results = Queue()
        self.url_cache = Set()
        self.name_cache = Set()
        self.black_urls = Set()
        self.black_cache = Dict()
        self.chunk_num = 0
        self.parser = HtmlParser(home='https://baike.baidu.com')

        self.last = 0
        self.state = 1

        if not os.path.exists(data_dir):
            os.mkdir(data_dir)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)
        self.data_dir = data_dir
        self.log_dir = log_dir

        self.writer = Thread(target=self._write)
        self.logger = Timer(log_interval, self._log)
        self.spiders = [Thread(target=self._scrap) for _ in range(worker_num)]
예제 #5
0
    def _reduce(self, imax=None):
        from utils import roman
        import DFA

        def init(i):
            """
            split into two groups
            I  - non terminal
            II - terminal
            """
            groups = {
                qi: roman(1) if qi not in self.F else roman(2)
                for qi in self.Q
            }

            δ = DFA.δ(Q=self.Q, Σ=self.Σ)
            for qi in (self.Q - self.F) + self.F:
                for a in self.Σ:
                    δ[qi, a] = groups.get(self.δ[qi, a])

            if imax is None or i < imax:
                return step(i + 1, groups, δ)
            return groups, δ

        def step(i, groups, δ):
            new_groups = {}
            force_move = 0
            numeratd_patterns = []
            for i in range(len(Set(groups.values()))):
                for qi, val in groups.items():
                    target = δ.group()[qi]
                    if val == roman(i + 1):
                        if target not in numeratd_patterns:
                            numeratd_patterns.append(target)
                        index = numeratd_patterns.index(target)
                        new_groups[qi] = roman(index + 1 + force_move)
                force_move += len(numeratd_patterns)
                numeratd_patterns = []

            new_δ = DFA.δ()
            for qi in self.Q:
                for a in self.Σ:
                    new_δ[qi, a] = new_groups.get(self.δ[qi, a])

            if (imax is None or i < imax) and groups != new_groups and δ != new_δ:
                return step(i + 1, new_groups, new_δ)
            return new_groups, new_δ

        groups, new_δ = init(0)

        Q = Set(groups.values())
        q0 = groups[self.q0]

        δ = DFA.δ()
        for (qi, a), target in new_δ.items():
            δ[groups[qi], a] = target

        F = Set(groups[qf] for qf in self.F)
        return DFA(Q, self.Σ, δ, q0, F)
예제 #6
0
파일: main.py 프로젝트: zloutek1/automata
def test_grammar():
    P = G.P({
        "S": "aA | bC | a | ε",
        "A": "bB | aA | b | c",
        "B": "aB | bC | aC | cA | c",
        "C": "a | b | aA | bB"
    })
    A = G(Set("S", "A", "C", "B"), Set("a", "b", "c"), P, "S")
    B = A.toNFA()
예제 #7
0
    def _canonize(self):
        import DFA

        Q = Set(chr(ord('A') + i) for i in range(len(self.Q)))
        letterMapping = dict(zip(self.δ.reachables(self.q0), Q))

        δ = DFA.δ()
        for (qi, a), target in self.δ.items():
            δ[letterMapping[qi], a] = letterMapping[target]

        q0 = letterMapping[self.q0]
        F = Set(letterMapping[qf] for qf in self.F)

        return DFA(Q, self.Σ, δ, q0, F)
예제 #8
0
파일: CFG.py 프로젝트: zloutek1/automata
    def toTopDownAnalyzer(self):
        import PDA
        L = self.remove_left_recursion()

        δ = PDA.δ()
        M = TopDownAnalyzer(Set("q"), L.Σ, L.N.union(L.Σ), δ, "q", L.S, Set())

        for A in L.N:
            for rule in L.P[A]:
                δ["q", "ε", A].add(("q", rule))

        for a in L.Σ:
            δ["q", a, a].add(("q", "ε"))

        return M
예제 #9
0
    def __setitem__(self, key, val):
        if len(key) == 2:
            super().__setitem__(tuple(map(str, key)), Set(map(str, val)))

        elif len(key) > 2 and len(key) == len(val) + 1:
            for k, v in zip(key[1:], val):
                self.__setitem__((key[0], k), v)
예제 #10
0
파일: CFG.py 프로젝트: zloutek1/automata
    def __setitem__(self, key, value):
        if isinstance(value, str):
            super().__setitem__(
                key, Set(map(Rule, map(str.strip, value.split("|")))))

        elif isinstance(value, Set):
            super().__setitem__(key, value)
예제 #11
0
파일: CFG.py 프로젝트: zloutek1/automata
 def do():
     nonlocal i, N
     i = i + 1
     N[i] = N[i - 1].union(
         Set(A for A in self.N for p in self.P[A]
             if re.sub("[∅" + "".join(N[i - 1]) +
                       "]", "", str(p)).islower()))
예제 #12
0
파일: CFG.py 프로젝트: zloutek1/automata
 def do():
     nonlocal i, V
     i = i + 1
     V[i] = V[i - 1].union(
         Set(sym for sym in self.N.union(self.Σ)
             if any(sym in rule for A in V[i - 1] if A.isupper()
                    for rule in self.P[A])))
예제 #13
0
파일: CFG.py 프로젝트: zloutek1/automata
 def calc_potentials():
     potentials = {}
     for A in N:
         potentials.setdefault(A, Set())
         for rule in P[A]:
             if rule[0] in N:
                 potentials[A].add(rule[0])
     return potentials
예제 #14
0
파일: CFG.py 프로젝트: zloutek1/automata
    def remove_left_recursion(self):
        def calc_potentials():
            potentials = {}
            for A in N:
                potentials.setdefault(A, Set())
                for rule in P[A]:
                    if rule[0] in N:
                        potentials[A].add(rule[0])
            return potentials

        N = self.N.copy()
        P = self.P.copy()

        for i, A in enumerate(N):
            for B in N[:i + 1]:
                if not B in calc_potentials()[A]:
                    continue

                if A == B:
                    α = [rule for rule in P[A] if rule.startswith(B)]
                    β = [rule for rule in P[A] if not rule.startswith(B)]
                    N = Set(f"{A}'").union(N)
                    P[f"{A}'"] |= Set(Rule(rule[1:]) for rule in α) | Set(
                        Rule(rule[1:] + f"{A}'") for rule in α)
                    P[A] = Set(Rule(rule) for rule in β) | Set(
                        Rule(rule + f"{A}'") for rule in β)

                else:
                    α = [rule for rule in P[A] if rule.startswith(B)]
                    β = [rule for rule in P[A] if not rule.startswith(B)]
                    P[A] = Set(Rule(rule) for rule in β) | Set(
                        Rule(ruleB + rule[1:]) for rule in α for ruleB in P[B])

        return CFG(N, self.Σ.copy(), P, self.S)
예제 #15
0
    def toNFA(self):
        """
        convert EFA to DFA
            remove ε steps
        """
        import NFA

        δ = NFA.δ()
        for qi in self.Q:
            for a in self.Σ:
                if a == "ε":
                    continue

                step1 = self.δ.get((qi, a), Set())

                step2 = Set()
                for s in self.δ.Dε(qi):
                    step2 |= self.δ.get((s, a), Set())

                step3 = Set()
                for s in step1 | step2:
                    step3 |= self.δ.Dε(s)

                δ[qi, a] = tuple(step3)

        Σ = self.Σ - Set("ε")
        F = (self.F if self.δ.Dε(self.q0).intercept(self.F) == Set() else
             self.F.union(Set(self.q0)))

        return NFA(self.Q, Σ, δ, self.q0, F)
예제 #16
0
def test_product():
    set1 = {1, 2}
    set2 = {'a', 'b', 'c'}

    product = Set.product(set1, set2)

    print("Set 1: ", set1)
    print("Set 2: ", set2)
    print("Cartesian product: ", product)
예제 #17
0
파일: CFG.py 프로젝트: zloutek1/automata
    def toBottomUpAnalyzer(self):
        import PDA
        # L = self.remove_left_recursion()
        L = self

        ô = PDA.ô()
        M = BottomUpAnalyzer(Set("q", "r"), L.Σ, L.N | L.Σ | Set("⊥"), ô, "q",
                             "⊥", Set("r"))

        for A in L.N:
            for rule in L.P[A]:
                ô["q", "ε", rule].add(("q", A))

        for a in L.Σ:
            ô["q", a, "ε"].add(("q", a))

        ô["q", "ε", "⊥S"].add(("r", "ε"))

        return M
예제 #18
0
 def Dε(self, q0):
     result = Set()
     stack = [q0]
     while len(stack) > 0:
         q = stack.pop(0)
         result.add(q)
         targets = self.__getitem__((q, "ε"))
         for target in targets:
             if target not in result:
                 stack.append(target)
             result.add(target)
     return result
예제 #19
0
파일: NFA.py 프로젝트: zloutek1/automata
    def __init__(self, Q, Σ, δ, q0, F):
        """

        Q  : set of states
        Σ  : finite alphabet
        δ  : Q × Σ → Q transition function
        q0 : q0 ∈ Q initial state
        F  : F ⊆ Q set of accepting states

        """

        super().__init__(Q, Σ, δ, Set(q0), F)
        self.q0 = str(q0)
예제 #20
0
파일: main.py 프로젝트: zloutek1/automata
def test_dfa():
    δ = DFA.δ()
    A = DFA(Set(1, 2, 3, 4, 5, 6, 7), Set("a", "b"), δ, 1, Set(3, 5, 6))

    δ[1, "a"] = 2
    δ[1, "b"] = "-"
    δ[2, "a"] = 3
    δ[2, "b"] = 4
    δ[3, "a"] = 6
    δ[3, "b"] = 5
    δ[4, "a"] = 3
    δ[4, "b"] = 2
    δ[5, "a"] = 6
    δ[5, "b"] = 3
    δ[6, "a"] = 2
    δ[6, "b"] = "-"
    δ[7, "a"] = 6
    δ[7, "b"] = 1

    # A.table()
    B = A.minimize()
    B.diagram()
예제 #21
0
파일: CFG.py 프로젝트: zloutek1/automata
    def remove_primitive_rules(self):
        """
        remove all rules of type A → B where A,B ∈ N
        """
        import CFG

        P = CFG.P()

        for A in self.N:
            NA = self.Nx(A)
            P[A] = Set(rule for B in NA for rule in self.P[B]
                       if not self.isprimitive(rule))

        return CFG(self.N, self.Σ, P, self.S)
예제 #22
0
파일: CFG.py 프로젝트: zloutek1/automata
    def Nx(self, x):
        i = 0
        N = {0: Set(x)}

        def do():
            nonlocal i, N
            i = i + 1
            N[i] = N[i - 1].union(
                Set(rule for A in N[i - 1] for rule in self.P[A]
                    if self.isprimitive(rule)))

        do()
        while N[i] != N[i - 1]:
            do()

        Nx = N[i]
        return Nx
예제 #23
0
    def reachables(self, q0):
        """
        return a list of all reachable qi states from the state q0
        """

        result = Set()
        stack = [q0]

        while len(stack) > 0:
            q = stack.pop(0)
            result.add(q)
            for a in self.Σ:
                target = self.__getitem__((q, a))
                if target is not None:
                    if target not in result:
                        stack.append(target)
                    result.add(target)

        return result
예제 #24
0
파일: CFG.py 프로젝트: zloutek1/automata
        def resolve(A, B):
            for _ in range(len(P[A])):
                rule = list(P[A].pop(0))
                if rule[0].islower():
                    for i, c in enumerate(rule[1:]):
                        if c.islower() and len(c) == 1:
                            rule[i + 1] = f"{c}̄"

                    rule = "".join(rule)
                    P[A].add(Rule(rule))

                elif rule[0] == B:
                    rules = Set()
                    for rule1 in P[B]:
                        rules.add(Rule(rule1 + "".join(rule[1:])))
                    P[A] |= rules

                else:
                    P[A].add(Rule(rule))
예제 #25
0
파일: CFG.py 프로젝트: zloutek1/automata
    def V(self):
        """
        get all reachable symbols
        """
        i = 0
        V = {0: Set(self.S)}

        def do():
            nonlocal i, V
            i = i + 1
            V[i] = V[i - 1].union(
                Set(sym for sym in self.N.union(self.Σ)
                    if any(sym in rule for A in V[i - 1] if A.isupper()
                           for rule in self.P[A])))

        do()
        while V[i] != V[i - 1]:
            do()

        V = V[i]
        return V
예제 #26
0
파일: grammar.py 프로젝트: aipyth/MaThEX
    def CYK_parser(self, w):
        n = len(w)
        C = np.empty((n, n), dtype=Set)
        for i in range(n):
            for j in range(n):
                C[i, j] = Set()

        for d in self.P:
            for i in range(n):
                # for k in range(0, n-i, -1):
                for k in range(n-i):
                    if self.check_prod_rule((d, w[i:i+k+1])):
                        C[i, i+k].add(d)

        for m in range(2, n+1):
            for i in range(1, n-m+2):
                j = i + m - 1
                for rule in self.get_nonterm_prod_rules():
                    for k in range(i, j):
                        lrule, rrule = rule[1][0], rule[1][1]
                        if lrule in C[i-1, k-1] and rrule in C[k, j-1]:
                            C[i-1, j-1].add(rule[0])
        return C
예제 #27
0
        def step(i, groups, δ):
            new_groups = {}
            force_move = 0
            numeratd_patterns = []
            for i in range(len(Set(groups.values()))):
                for qi, val in groups.items():
                    target = δ.group()[qi]
                    if val == roman(i + 1):
                        if target not in numeratd_patterns:
                            numeratd_patterns.append(target)
                        index = numeratd_patterns.index(target)
                        new_groups[qi] = roman(index + 1 + force_move)
                force_move += len(numeratd_patterns)
                numeratd_patterns = []

            new_δ = DFA.δ()
            for qi in self.Q:
                for a in self.Σ:
                    new_δ[qi, a] = new_groups.get(self.δ[qi, a])

            if (imax is None or i < imax) and groups != new_groups and δ != new_δ:
                return step(i + 1, new_groups, new_δ)
            return new_groups, new_δ
예제 #28
0
파일: CFG.py 프로젝트: zloutek1/automata
    def Nε(self):
        """
        get all states that can turn into ε
        """

        import re
        i = 0
        N = {0: Set()}

        def do():
            nonlocal i, N
            i = i + 1
            N[i] = N[i - 1].union(
                Set(A for A in self.N for p in self.P[A]
                    if re.sub("[∅ε" + "".join(N[i - 1]) +
                              "]", "", str(p)) == ""))

        do()
        while N[i] != N[i - 1]:
            do()

        Nε = N[i]
        return Nε
예제 #29
0
파일: CFG.py 프로젝트: zloutek1/automata
    def Ne(self):
        """
        get all normalised nonterminals
        """

        import re
        i = 0
        N = {0: Set()}

        def do():
            nonlocal i, N
            i = i + 1
            N[i] = N[i - 1].union(
                Set(A for A in self.N for p in self.P[A]
                    if re.sub("[∅" + "".join(N[i - 1]) +
                              "]", "", str(p)).islower()))

        do()
        while N[i] != N[i - 1]:
            do()

        Ne = N[i]
        return Ne
예제 #30
0
파일: CFG.py 프로젝트: zloutek1/automata
    def toGNF(self):
        """
        each rule must be of format
        A → aB1B2B3...Bn   (a ∈ Σ, B1,B2,B3,...,Bn ∈ N)
        """
        import CFG

        G = self.remove_left_recursion()
        N = Set(reversed(G.N.copy()))
        P = G.P.copy()

        def resolve(A, B):
            for _ in range(len(P[A])):
                rule = list(P[A].pop(0))
                if rule[0].islower():
                    for i, c in enumerate(rule[1:]):
                        if c.islower() and len(c) == 1:
                            rule[i + 1] = f"{c}̄"

                    rule = "".join(rule)
                    P[A].add(Rule(rule))

                elif rule[0] == B:
                    rules = Set()
                    for rule1 in P[B]:
                        rules.add(Rule(rule1 + "".join(rule[1:])))
                    P[A] |= rules

                else:
                    P[A].add(Rule(rule))

        for i, A in enumerate(N):
            for B in N[:i + 1]:
                resolve(A, B)

        return CFG(N, self.Σ, P, self.S)