Exemplo n.º 1
0
def cal_firstVt(gram_list: list,Vn_list:list,Vt_list:list) -> 'firstVt_dict':
    """计算每一个终结符的firstVt--{'a': set({'abc''})}"""
    # 初始化每一个终结符的firstVt集
    firstVt_dict = {}
    for v in Vn_list:
        firstVt_dict[v] = set()

    while True:
        # 循坏开始前,记录每个非终结符的firstvt集的长度
        len_list1 = len_dict(firstVt_dict)
        for gene in gram_list:
            # 产生式的右部含有终结符
            if has_Vt(gene):
                # 找出首个终结符
                first_vt = list(filter(lambda x: not isupper(x),gene[1]))[0]
                firstVt_dict[gene[0]].add(first_vt)
            # 产生式右部为空的情况
            elif gene[1] == '':
                pass
            else:
                # 产生式的右部不含终结符
                # 找出首个非终结符
                first_vn = list(filter(lambda x: isupper(x),gene[1]))[0]
                for char in firstVt_dict[first_vn]:
                    firstVt_dict[gene[0]].add(char)

        # 循坏开始后,记录每个非终结符的firstvt集的长度
        len_list2 = len_dict(firstVt_dict)
        # 如果每个非终结符的firstvt集都没有再变化,则计算完毕
        if len_list1 == len_list2:
            break

    print('firstVt集为:',firstVt_dict)
    return firstVt_dict
Exemplo n.º 2
0
def cal_select(gene_list: list, first_dict: dict,
               follow_dict: dict) -> 'select_dict':
    """计算每一个产生式的select集(字典结构):{'A->abc': set{'abc'},}"""
    # 初始化select集
    select_dict = {}
    # for gene in gene_list:
    #     temp_gene = '->'.join(gene)
    #     select_dict[temp_gene] = set()

    for gene in gene_list:
        temp_gene = '->'.join(gene)
        select_dict[temp_gene] = set()
        # 产生式右部为空
        if gene[1] == '':
            for char in follow_dict[gene[0]]:
                select_dict[temp_gene].add(char)
                # 产生式右部第一个符号为终结符
        elif islower(gene[1][0]):
            select_dict[temp_gene].add(gene[1][0])
        # 产生式右部第一个符号为非终结符
        elif isupper(gene[1][0]):
            for v in gene[1]:
                # 找到产生式右部首个一定不为空的符号的位置
                # 初始化为最后一个符号
                not_null_index = len(gene[1]) - 1
                for i in range(1, len(gene[1])):
                    v = gene[1][i]
                    # 首个Vn不能推出空,或者能推出空后面的首个Vt
                    if isupper(v) and '' not in first_dict[v] or islower(v):
                        not_null_index = i
                        break
                # 产生式的右部可推出空
                if isupper(gene[1][not_null_index]) and not_null_index == len(
                        gene[1]) - 1 and '' in first_dict[gene[1]
                                                          [not_null_index]]:
                    for m in (0, len(gene[1])):
                        select_dict[temp_gene].add(first_dict[gene[1][m]])
                    select_dict[temp_gene] -= {''}
                    select_dict[temp_gene].add(follow_dict[gene[0]])
                # 产生式的右部不可推出空
                else:
                    for n in range(0, not_null_index + 1):
                        if islower(gene[1][n]):
                            select_dict[temp_gene].add(gene[1][n])
                        else:
                            if '' in first_dict[gene[1][n]]:
                                for char in first_dict[gene[1][n]]:
                                    select_dict[temp_gene].add(char)
                                    select_dict[temp_gene] -= {''}
                            else:
                                for char in first_dict[gene[1][n]]:
                                    select_dict[temp_gene].add(char)

    print('select集为:', select_dict)
    return select_dict
Exemplo n.º 3
0
def classify_status(gram_list: list,firstVt_dict,lastVt_dict) -> '[[set(),'>',set()]]':
    """计算终结符之间的关系"""
    status_list = []
    for gene in gram_list:
        if len(gene[1]) >= 2:
            for i in range(0,len(gene[1]) - 1):
                if not isupper(gene[1][i]) and not isupper(gene[1][i + 1]):
                    temp_set1 = set()
                    temp_set2 = set()
                    temp_set1.add(gene[1][i])
                    temp_set2.add(gene[1][i + 1])
                    temp_list = []
                    temp_list.append(temp_set1)
                    temp_list.append('=')
                    temp_list.append(temp_set2)
                    status_list.append(temp_list)
                elif not isupper(gene[1][i]) and isupper(gene[1][i + 1]):
                    temp_set1 = set()
                    temp_set2 = set()
                    temp_set1.add(gene[1][i])
                    temp_list = []
                    temp_list.append(temp_set1)
                    temp_list.append('<')
                    temp_list.append(firstVt_dict[gene[1][i + 1]])
                    # print('temp_list:',temp_list)
                    # print('status_list',status_list)
                    status_list.append(temp_list)
                    # print('status_list',status_list)
                elif isupper(gene[1][i]) and not isupper(gene[1][i + 1]):
                    temp_set1 = set()
                    temp_set2 = set()
                    temp_set2.add(gene[1][i + 1])
                    temp_list = []
                    temp_list.append(lastVt_dict[gene[1][i]])
                    temp_list.append('>')
                    temp_list.append(temp_set2)
                    status_list.append(temp_list)
        if len(gene[1]) >= 3:
            for i in range(0,len(gene[1]) - 2):
                # print(gene[1][i])
                if not isupper(gene[1][i]) and isupper(gene[1][i + 1]) and not isupper(gene[1][i + 2]):
                    temp_set1 = set()
                    temp_set2 = set()
                    temp_set1.add(gene[1][i])
                    temp_set2.add(gene[1][i + 2])
                    temp_list = []
                    temp_list.append(temp_set1)
                    temp_list.append('=')
                    temp_list.append(temp_set2)
                    status_list.append(temp_list)

    print('各终结符之间的优先关系:')
    for status in status_list:
        print(str(status))
    return status_list
Exemplo n.º 4
0
def has_Vt(gene: tuple)->bool:
    """#判断产生式的右部是否含有终结符"""
    temp_vt_list = list(filter(lambda x: not isupper(x),gene[1]))
    if temp_vt_list != []:
        return True
    else:
        return False
Exemplo n.º 5
0
def find_V(grammer_list: list) -> 'Vn_list,Vt_list':
    """找出文法中的所有非终结符和终结符"""
    # 非终结符列表
    Vn_list = []
    Vn_set = set()
    # 终结符列表
    Vt_list = []
    Vt_set = set()
    # 所有符号的列表
    total_list = []
    for i in grammer_list:
        # 将元组转为列表
        temp_list = list(i)
        # 将所有列表转为一个列表
        total_list.extend(temp_list)
        # 将列表转为字符串
        total_str = ''.join(total_list)
        # 将所有符号组成的字符串转为列表中的单个字符
        for char in total_str:
            # 筛选出字符列表中的终结符和非终结符
            if isupper(char):
                Vn_set.add(char)
            else:
                Vt_set.add(char)
    Vn_list = list(Vn_set)
    Vt_list = list(Vt_set)
    print('Vn为:',Vn_list)
    print('Vt为:', Vt_list)
    return Vn_list,Vt_list
Exemplo n.º 6
0
def cal_first(gene_list: list, Vn_list: list) -> 'first_dict':
    """计算每一个非终结符first集(字典结构){'A': set{'abc'},}"""
    # 为所有的非终结符初始化first集
    first_dict = {}
    for Vn in Vn_list:
        first_dict[Vn] = set()

    while True:
        # 记录每轮循环前的first集的大小
        len_list1 = len_dict(first_dict)

        # 遍历每一条产生式,计算first集
        for gene in gene_list:
            if gene[1] == '':
                first_char = ''
            else:
                first_char = gene[1][0]
            # print(first_char)
            if first_char in Vt_list:
                first_dict[gene[0]].add(first_char)  # and len(gene[1]) == 1
            elif first_char == '' and len(gene[1]) == 0:
                first_dict[gene[0]].add('')
            elif first_char in Vn_list:
                # 找到产生式右部首个一定不为空的符号的位置
                # 初始化为最后一个符号
                not_null_index = len(gene[1]) - 1
                for i in range(1, len(gene[1])):
                    v = gene[1][i]
                    # 首个Vn不能推出空,或者能推出空后面的首个Vt
                    if isupper(v) and '' not in first_dict[v] or islower(v):
                        not_null_index = i
                        break
                # 计算该情况下的first集
                for i in range(0, not_null_index + 1):
                    if islower(gene[1][i]):
                        first_dict[gene[0]].add(gene[1][i])
                    else:
                        for char in first_dict[gene[1][i]]:
                            if '' in first_dict[gene[1][i]]:
                                first_dict[gene[0]].add(char)
                                first_dict[gene[0]] -= {''}
                            else:
                                first_dict[gene[0]].add(char)

        # 记录每轮循环后的first集的大小
        len_list2 = len_dict(first_dict)
        # 判断first是否增大,若全未增大,则循环结束
        if len_list1 == len_list2:
            break

    print('first集为:', first_dict)
    return first_dict
Exemplo n.º 7
0
def cal_lastVt(gram_list: list,Vn_list,Vt_list) -> 'lastVt_dict':
    """计算每一个终结符的lastVt--{'a': {'abc''}}"""
    # 初始化每一个终结符的lastVt集
    lastVt_dict = {}
    for v in Vn_list:
        lastVt_dict[v] = set()

    while True:
        # 循坏开始前,记录每个非终结符的lastVt集的长度
        len_list1 = len_dict(lastVt_dict)
        for gene in gram_list:
            # 产生式的右部含有终结符
            if has_Vt(gene):
                # 找出最后一个终结符
                temp_list = list(filter(lambda x: not isupper(x), gene[1]))
                last_vt = temp_list[len(temp_list) - 1]
                lastVt_dict[gene[0]].add(last_vt)
            # 产生式右部为空的情况
            elif gene[1] == '':
                pass
            else:
                # 产生式的右部不含终结符
                # 找出最后一个非终结符
                temp_list = list(filter(lambda x: isupper(x), gene[1]))
                last_vn = temp_list[len(temp_list) - 1]
                for char in lastVt_dict[last_vn]:
                    lastVt_dict[gene[0]].add(char)

        # 循坏开始后,记录每个非终结符的lastVt集的长度
        len_list2 = len_dict(lastVt_dict)
        # 如果每个非终结符的lastVt集都没有再变化,则计算完毕
        if len_list1 == len_list2:
            break

    print('lastVt集为:', lastVt_dict)
    return lastVt_dict
Exemplo n.º 8
0
def checkUpper(input):
    if input == "Đ" or input == "Ư":
        return True
    else:
        return isupper(input)
Exemplo n.º 9
0
    def FMMBMM(self,sentence):
        sentence = self.standardlizeString(sentence)
        vw = sentence.strip().split(' ')
        n = len(vw)
        q = []
        strVWS = " |"
        for i in range(0, n+1):
            q.append('')
        w = ''
        vt1 = []
        i = 0
        j = 0
        k = 0
        a = []
        for i in range(0, n+1):
            a.append(0.0)
        L = [[0]*(n + 1) for x in range(n + 1)]    
        for i in range(0,n+1):
            for j in range(0,n+1):
                L[i][j]=self.maxval 
        
        i = n - 1
        while i >=0:
            w = vw[i]
            L[i][i+1] = 1.0
            q[i+1] = w 
            if i >0:
                k = i
                for j in reversed(range(0, k)):
                    w = vw[j]+" "+w
                    if self.td.get(w.lower(),0) > 0:
                        L[j][k+1] = 1.0 /(k-j+1)
                        q[k+1]=w
            i -= 1
        
        a[0]=self.minval
        for i in range(1, n+1):
            for j in range(0, i):
                w = vw[j]
                if a[i] > (a[j] + L[j][i]):
                    a[i] = a[j]+ L[j][i]
                    for k in range(j+1, i):
                        w = w + " "+ vw[k]
                    q[i] =  w
        i = n
        tpstrVWS=''
        while i > 0:
            vt1 = q[i].strip().split(' ')
            strVWS = " | " + q[i] + strVWS
            tpstrVWS = ' '+q[i].replace(' ','_') + tpstrVWS+' '
            i = i - len(vt1)

        ttp = tpstrVWS.strip().split()
        listUpper = []

        for index in range(0, len(ttp)):
            if isupper(ttp[index][0]) and ttp[index].find('_') <0:
                listUpper.append(index)

        listreplace = []
        dicDau = {}
        for vv in group_consecutives(listUpper):
            if len(vv) >= 2:
                listreplace.append(vv)
                for v in vv[:-1]:
                    dicDau[v]=1
        result = ""
        for i in range(0, len(ttp)):
            result += ttp[i]+self.dau[dicDau.get(i,0)]
        return result.strip()
Exemplo n.º 10
0
def cal_follow(gene_list: list, Vn_list: list,
               first_dict: dict) -> 'follow_dict':
    """计算每一个非终结符follow集(字典结构){'A': set{'abc'},}"""
    # 为每一个非终结符初始化follow集,初始化开始符号的follow集为#
    follow_dict = {}
    for Vn in Vn_list:
        follow_dict[Vn] = set()
    follow_dict['S'].add('#')

    while True:
        len_list1 = len_dict(follow_dict)

        # for Vn in Vn_list:
        for gene in gene_list:
            for i in range(0, len(gene[1])):
                # 指定的非终结符后还有其他符号
                if isupper(gene[1][i]) and i < len(gene[1]) - 1:
                    if islower(gene[1][i + 1]):
                        follow_dict[gene[1][i]].add(gene[1][i + 1])
                    else:
                        # 找到产生式右部的指定个非终结符后首个一定不为空的符号的位置
                        # 初始化为最后一个符号的位置
                        not_null_index = len(gene[1]) - 1
                        for j in range(i + 1, len(gene[1])):
                            v = gene[1][j]
                            # 首个Vn不能推出空,或者能推出空后面的首个Vt
                            if isupper(v) and '' not in first_dict[
                                    v] or islower(v):
                                not_null_index = j
                                break
                        # 没找到产生式右部的指定个非终结符后首个一定不为空的符号
                        if isupper(gene[1]
                                   [not_null_index]) and not_null_index == len(
                                       gene[1]) - 1 and '' in first_dict[
                                           gene[1][not_null_index]]:
                            for m in range(i + 1, not_null_index + 1):
                                for char in first_dict[gene[1][m]]:
                                    follow_dict[gene[1][i]].add(char)
                                follow_dict[gene[1][i]] -= {''}
                            for char in follow_dict[gene[0]]:
                                follow_dict[gene[1][i]].add(char)
                        # 找到了产生式右部的指定的非终结符后首个一定不为空的符号
                        else:
                            # 计算该情况下的follow集
                            for n in range(i + 1, not_null_index + 1):
                                if islower(gene[1][n]):
                                    follow_dict[gene[1][i]].add(gene[1][n])
                                else:
                                    if '' in first_dict[gene[1][n]]:
                                        for char in first_dict[gene[1][n]]:
                                            follow_dict[gene[1][i]].add(char)
                                        follow_dict[gene[1][i]] -= {''}
                                    else:
                                        for char in first_dict[gene[1][n]]:
                                            follow_dict[gene[1][i]].add(char)
                # 所求的非终结符位于产生式右部的最后一位
                elif isupper(gene[1][i]) and i == len(gene[1]) - 1:
                    for char in follow_dict[gene[0]]:
                        follow_dict[gene[1][i]].add(char)

        len_list2 = len_dict(follow_dict)
        if len_list1 == len_list2:
            break
    print('follow集为:', follow_dict)
    return follow_dict