Пример #1
0
def howmuch_3(segment_set, num, ner):
    tok = nltk.word_tokenize(segment_set[num])
    tag = nltk.pos_tag(tok)
    gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?<\$>*<CD>+}"""
    chunkparser = nltk.RegexpParser(gram)
    chunked = chunkparser.parse(tag)

    list1 = parser.chunk_search(segment_set[num], chunked)
    list3 = []

    if len(list1) != 0:
        for j in range(len(chunked)):
            str1 = ""
            str2 = ""
            str3 = ""
            if j in list1:
                for k in range(j):
                    if k in list1:
                        str1 += get_chunk(chunked[k])
                    else:
                        str1 += (chunked[k][0] + " ")

                for k in range(j + 1, len(chunked)):
                    if k in list1:
                        str3 += get_chunk(chunked[k])
                    else:
                        str3 += (chunked[k][0] + " ")

                strx = get_chunk(chunked[j])
                tok = nltk.word_tokenize(strx)
                tag = nltk.pos_tag(tok)
                gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?}"""
                chunkparser = nltk.RegexpParser(gram)
                chunked1 = chunkparser.parse(tag)

                strx = get_chunk(chunked1[0])
                str1 += (" " + strx)

                str2 = ' how much '

                tok = nltk.word_tokenize(str1)
                tag = nltk.pos_tag(tok)
                gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
                chunkparser = nltk.RegexpParser(gram)
                chunked1 = chunkparser.parse(tag)

                list2 = parser.chunk_search(str1, chunked1)

                if len(list2) != 0:
                    m = list2[len(list2) - 1]

                    str4 = get_chunk(chunked1[m])
                    str4 = parser.verbphrase_identify(str4)
                    str5 = ""
                    str6 = ""

                    for k in range(m):
                        if k in list2:
                            str5 += get_chunk(chunked1[k])
                        else:
                            str5 += (chunked1[k][0] + " ")

                    for k in range(m + 1, len(chunked1)):
                        if k in list2:
                            str6 += get_chunk(chunked1[k])
                        else:
                            str6 += (chunked1[k][0] + " ")

                    st = str5 + str2 + str4 + str6 + str3

                    for l in range(num + 1, len(segment_set)):
                        st += ("," + segment_set[l])
                    st += '?'
                    st = parser.postprocess(st)
                    # st = 'Q.' + st
                    list3.append(st)

    return list3
Пример #2
0
def what_to_do(segment_set, num, ner):
    tok = nltk.word_tokenize(segment_set[num])
    tag = nltk.pos_tag(tok)
    gram = r"""chunk:{<TO>+<VB|VBP|RP>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT>*}"""
    chunkparser = nltk.RegexpParser(gram)
    chunked = chunkparser.parse(tag)

    list1 = parser.chunk_search(segment_set[num], chunked)
    list3 = []

    if len(list1) != 0:
        for j in range(len(chunked)):
            str1 = ""
            str2 = ""
            str3 = ""
            if j in list1:
                for k in range(j):
                    if k in list1:
                        str1 += get_chunk(chunked[k])
                    else:
                        str1 += (chunked[k][0] + " ")

                for k in range(j + 1, len(chunked)):
                    if k in list1:
                        str3 += get_chunk(chunked[k])
                    else:
                        str3 += (chunked[k][0] + " ")

                ls = get_chunk(chunked[j])
                tok = nltk.word_tokenize(ls)
                tag = nltk.pos_tag(tok)
                gram = r"""chunk:{<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT>+}"""
                chunkparser = nltk.RegexpParser(gram)
                chunked2 = chunkparser.parse(tag)
                lis = parser.chunk_search(ls, chunked2)
                if len(lis) != 0:
                    x = lis[len(lis) - 1]
                    ls1 = get_chunk(chunked2[x])
                    index = ls.find(ls1)
                    str2 = " " + ls[0:index]
                else:
                    str2 = " to do "

                tok = nltk.word_tokenize(str1)
                tag = nltk.pos_tag(tok)
                gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
                chunkparser = nltk.RegexpParser(gram)
                chunked1 = chunkparser.parse(tag)

                list2 = parser.chunk_search(str1, chunked1)
                if len(list2) != 0:
                    m = list2[len(list2) - 1]

                    str4 = get_chunk(chunked1[m])
                    str4 = parser.verbphrase_identify(str4)
                    str5 = ""
                    str6 = ""

                    for k in range(m):
                        if k in list2:
                            str5 += get_chunk(chunked1[k])
                        else:
                            str5 += (chunked1[k][0] + " ")

                    for k in range(m + 1, len(chunked1)):
                        if k in list2:
                            str6 += get_chunk(chunked1[k])
                        else:
                            str6 += (chunked1[k][0] + " ")

                    if chunked2[j][1][1] == 'PRP':
                        tr = " whom "
                    else:
                        for x in range(len(chunked[j])):
                            if (chunked[j][x][1] == "NNP"
                                    or chunked[j][x][1] == "NNPS"
                                    or chunked[j][x][1] == "NNS"
                                    or chunked[j][x][1] == "NN"):
                                break

                        for x1 in range(len(ner)):
                            if ner[x1][0] == chunked[j][x][0]:
                                if ner[x1][1] == "PERSON":
                                    tr = " whom "
                                elif ner[x1][1] == "LOC" or ner[x1][
                                        1] == "ORG" or ner[x1][1] == "GPE":
                                    tr = " where "
                                elif ner[x1][1] == "TIME" or ner[x1][
                                        1] == "DATE":
                                    tr = " when "
                                else:
                                    tr = " what "

                    st = str5 + tr + str4 + str2 + str6 + str3
                    for l in range(num + 1, len(segment_set)):
                        st += ("," + segment_set[l])
                    st += '?'
                    st = parser.postprocess(st)
                    # st = 'Q.' + st
                    list3.append(st)

    return list3