def howmuch_3(segment_set, num, ner): tok = nltk.word_tokenize(segment_set[num]) tag = nltk.pos_tag(tok) gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?<\$>*<CD>+}""" chunkparser = nltk.RegexpParser(gram) chunked = chunkparser.parse(tag) list1 = parser.chunk_search(segment_set[num], chunked) list3 = [] if len(list1) != 0: for j in range(len(chunked)): str1 = "" str2 = "" str3 = "" if j in list1: for k in range(j): if k in list1: str1 += get_chunk(chunked[k]) else: str1 += (chunked[k][0] + " ") for k in range(j + 1, len(chunked)): if k in list1: str3 += get_chunk(chunked[k]) else: str3 += (chunked[k][0] + " ") strx = get_chunk(chunked[j]) tok = nltk.word_tokenize(strx) tag = nltk.pos_tag(tok) gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?}""" chunkparser = nltk.RegexpParser(gram) chunked1 = chunkparser.parse(tag) strx = get_chunk(chunked1[0]) str1 += (" " + strx) str2 = ' how much ' tok = nltk.word_tokenize(str1) tag = nltk.pos_tag(tok) gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}""" chunkparser = nltk.RegexpParser(gram) chunked1 = chunkparser.parse(tag) list2 = parser.chunk_search(str1, chunked1) if len(list2) != 0: m = list2[len(list2) - 1] str4 = get_chunk(chunked1[m]) str4 = parser.verbphrase_identify(str4) str5 = "" str6 = "" for k in range(m): if k in list2: str5 += get_chunk(chunked1[k]) else: str5 += (chunked1[k][0] + " ") for k in range(m + 1, len(chunked1)): if k in list2: str6 += get_chunk(chunked1[k]) else: str6 += (chunked1[k][0] + " ") st = str5 + str2 + str4 + str6 + str3 for l in range(num + 1, len(segment_set)): st += ("," + segment_set[l]) st += '?' st = parser.postprocess(st) # st = 'Q.' + st list3.append(st) return list3
def what_to_do(segment_set, num, ner): tok = nltk.word_tokenize(segment_set[num]) tag = nltk.pos_tag(tok) gram = r"""chunk:{<TO>+<VB|VBP|RP>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT>*}""" chunkparser = nltk.RegexpParser(gram) chunked = chunkparser.parse(tag) list1 = parser.chunk_search(segment_set[num], chunked) list3 = [] if len(list1) != 0: for j in range(len(chunked)): str1 = "" str2 = "" str3 = "" if j in list1: for k in range(j): if k in list1: str1 += get_chunk(chunked[k]) else: str1 += (chunked[k][0] + " ") for k in range(j + 1, len(chunked)): if k in list1: str3 += get_chunk(chunked[k]) else: str3 += (chunked[k][0] + " ") ls = get_chunk(chunked[j]) tok = nltk.word_tokenize(ls) tag = nltk.pos_tag(tok) gram = r"""chunk:{<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT>+}""" chunkparser = nltk.RegexpParser(gram) chunked2 = chunkparser.parse(tag) lis = parser.chunk_search(ls, chunked2) if len(lis) != 0: x = lis[len(lis) - 1] ls1 = get_chunk(chunked2[x]) index = ls.find(ls1) str2 = " " + ls[0:index] else: str2 = " to do " tok = nltk.word_tokenize(str1) tag = nltk.pos_tag(tok) gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}""" chunkparser = nltk.RegexpParser(gram) chunked1 = chunkparser.parse(tag) list2 = parser.chunk_search(str1, chunked1) if len(list2) != 0: m = list2[len(list2) - 1] str4 = get_chunk(chunked1[m]) str4 = parser.verbphrase_identify(str4) str5 = "" str6 = "" for k in range(m): if k in list2: str5 += get_chunk(chunked1[k]) else: str5 += (chunked1[k][0] + " ") for k in range(m + 1, len(chunked1)): if k in list2: str6 += get_chunk(chunked1[k]) else: str6 += (chunked1[k][0] + " ") if chunked2[j][1][1] == 'PRP': tr = " whom " else: for x in range(len(chunked[j])): if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or chunked[j][x][1] == "NN"): break for x1 in range(len(ner)): if ner[x1][0] == chunked[j][x][0]: if ner[x1][1] == "PERSON": tr = " whom " elif ner[x1][1] == "LOC" or ner[x1][ 1] == "ORG" or ner[x1][1] == "GPE": tr = " where " elif ner[x1][1] == "TIME" or ner[x1][ 1] == "DATE": tr = " when " else: tr = " what " st = str5 + tr + str4 + str2 + str6 + str3 for l in range(num + 1, len(segment_set)): st += ("," + segment_set[l]) st += '?' st = parser.postprocess(st) # st = 'Q.' + st list3.append(st) return list3