def whom_2(segment_set, num, ner): tok = nltk.word_tokenize(segment_set[num]) tag = nltk.pos_tag(tok) gram = r"""chunk:{<IN>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT|CD|VBN>+}""" chunkparser = nltk.RegexpParser(gram) chunked = chunkparser.parse(tag) list1 = identification.chunk_search(segment_set[num], chunked) list3 = [] if len(list1) != 0: for j in range(len(chunked)): str1 = "" str2 = "" str3 = "" if j in list1: for k in range(j): if k in list1: str1 += nonClause.get_chunk(chunked[k]) else: str1 += (chunked[k][0] + " ") for k in range(j + 1, len(chunked)): if k in list1: str3 += nonClause.get_chunk(chunked[k]) else: str3 += (chunked[k][0] + " ") if chunked[j][1][1] == 'PRP': str2 = " " + chunked[j][0][0] + " whom " else: for x in range(len(chunked[j])): if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or chunked[j][x][1] == "NN"): break for x1 in range(len(ner)): if ner[x1][0] == chunked[j][x][0]: if ner[x1][1] == "PERSON": str2 = " " + chunked[j][0][0] + " whom " elif ner[x1][1] == "LOC" or ner[x1][ 1] == "ORG" or ner[x1][1] == "GPE": str2 = " where " elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE": str2 = " when " else: str2 = " " + chunked[j][0][0] + " what " tok = nltk.word_tokenize(str1) tag = nltk.pos_tag(tok) gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}""" chunkparser = nltk.RegexpParser(gram) chunked1 = chunkparser.parse(tag) list2 = identification.chunk_search(str1, chunked1) if len(list2) != 0: m = list2[len(list2) - 1] str4 = nonClause.get_chunk(chunked1[m]) str4 = identification.verbphrase_identify(str4) str5 = "" str6 = "" for k in range(m): if k in list2: str5 += nonClause.get_chunk(chunked1[k]) else: str5 += (chunked1[k][0] + " ") for k in range(m + 1, len(chunked1)): if k in list2: str6 += nonClause.get_chunk(chunked1[k]) else: str6 += (chunked1[k][0] + " ") st = str5 + str2 + str4 + str6 + str3 for l in range(num + 1, len(segment_set)): st += ("," + segment_set[l]) st += '?' st = identification.postprocess(st) # st = 'Q.' + st list3.append(st) return list3
def howmuch_3(segment_set, num, ner): tok = nltk.word_tokenize(segment_set[num]) tag = nltk.pos_tag(tok) gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?<\$>*<CD>+}""" chunkparser = nltk.RegexpParser(gram) chunked = chunkparser.parse(tag) list1 = identification.chunk_search(segment_set[num], chunked) list3 = [] if len(list1) != 0: for j in range(len(chunked)): str1 = "" str2 = "" str3 = "" if j in list1: for k in range(j): if k in list1: str1 += nonClause.get_chunk(chunked[k]) else: str1 += (chunked[k][0] + " ") for k in range(j + 1, len(chunked)): if k in list1: str3 += nonClause.get_chunk(chunked[k]) else: str3 += (chunked[k][0] + " ") strx = nonClause.get_chunk(chunked[j]) tok = nltk.word_tokenize(strx) tag = nltk.pos_tag(tok) gram = r"""chunk:{<MD>?<VB|VBD|VBG|VBP|VBN|VBZ>+<IN|TO>?<PRP|PRP\$|NN.?>?}""" chunkparser = nltk.RegexpParser(gram) chunked1 = chunkparser.parse(tag) strx = nonClause.get_chunk(chunked1[0]) str1 += (" " + strx) str2 = ' how much ' tok = nltk.word_tokenize(str1) tag = nltk.pos_tag(tok) gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}""" chunkparser = nltk.RegexpParser(gram) chunked1 = chunkparser.parse(tag) list2 = identification.chunk_search(str1, chunked1) if len(list2) != 0: m = list2[len(list2) - 1] str4 = nonClause.get_chunk(chunked1[m]) str4 = identification.verbphrase_identify(str4) str5 = "" str6 = "" for k in range(m): if k in list2: str5 += nonClause.get_chunk(chunked1[k]) else: str5 += (chunked1[k][0] + " ") for k in range(m + 1, len(chunked1)): if k in list2: str6 += nonClause.get_chunk(chunked1[k]) else: str6 += (chunked1[k][0] + " ") st = str5 + str2 + str4 + str6 + str3 for l in range(num + 1, len(segment_set)): st += ("," + segment_set[l]) st += '?' st = identification.postprocess(st) # st = 'Q.' + st list3.append(st) return list3