def detokenize(input): output = TreebankWordDetokenizer().detokenize(input) output = output.replace(" , ", ", ") output = output.replace(" ' ", "'") output = output.replace(" ’ ", "’") output = output.replace(" ’", "’") output = output.replace(" . ", ". ") output = output.replace(" : ", ": ") output = output.replace(" ; ", "; ") return output
def detokenize(tokens): #Detokenizing tokens into sentence detokenized_text = TreebankWordDetokenizer().detokenize(tokens) detokenized_text = detokenized_text.replace( " .", ".") #Remove the space before period detokenized_text = detokenized_text.replace( " ,", ",") #Remove the space before comma detokenized_text = detokenized_text.replace( " :", ":") #Remove the space before colon return detokenized_text
def clean(features): Features = [] for review in features: words = nltk.tokenize.word_tokenize(review) words = [word.lower() for word in words] # remove punctuations transform = str.maketrans('', '', string.punctuation) words = [word.translate(transform) for word in words] # remove non-alphabetical words words = [word for word in words if word.isalpha()] # remove uneccessary and overused prepositions like a, and, the stop_words = set(stopwords.words('english')) words = [word for word in words if not word in stop_words] # combine list of words back to a review review = TreebankWordDetokenizer().detokenize(words) # convert all words to their root word according to meaning lemmatizer = WordNetLemmatizer() words = [lemmatizer.lemmatize(word, pos='v') for word in words] words = [lemmatizer.lemmatize(word, pos='a') for word in words] # remove line break tags review = review.replace('br', '') Features.append(review) return Features
def remove_stopwords(text): """ Remove all the stopwords in ``text`` like: the, an, a, in, etc. using nltk list of stopwords. Args: text (str): raw text Returns: str """ stop_words = set(stopwords.words('english')) word_tokens = word_tokenize(text) sentences = [w for w in word_tokens if not w in stop_words] s = TreebankWordDetokenizer().detokenize(sentences) s = s.replace("``", ' "').replace("''", '"') while " 's" in s: s = s.replace(" 's", "'s") return s.replace(" .", ".")
def make_readable_sampl(samp): s = samp['token'].copy() s[samp['subj_start']] = '<e1>' + s[samp['subj_start']] s[samp['subj_end']] = s[samp['subj_end']] + '</e1>' s[samp['obj_start']] = '<e2>' + s[samp['obj_start']] s[samp['obj_end']] = s[samp['obj_end']] + '</e2>' s_detokenize = TreebankWordDetokenizer().detokenize(s) s_detokenize = s_detokenize.replace(">>", ">") s_detokenize = s_detokenize.replace("<<", "<") return {'id': samp['id'],'relation': samp['relation'], 'token': s_detokenize, 'subj_type':samp['subj_type'], 'obj_type':samp['obj_type']}
def remove_duplicate_tokens(input_string): """Removes duplicate tokens from input string, unless permitted :param input_string: :return: output string without duplicate tokens unless allowed :rtype: str """ refined_phrase_list = [] new_phrase_list = input_string.split(' ') for token in new_phrase_list: if token not in refined_phrase_list: refined_phrase_list.append(token) refined_string = TreebankWordDetokenizer().detokenize(refined_phrase_list) refined_string = refined_string.strip() # Permitted duplicate tokens restored (for more such tokens, in # future it can be dealt by storing in pre-defined resources) if "gallus gallus" in input_string \ and "gallus gallus" not in refined_string: refined_string = refined_string.replace("gallus", "gallus gallus") return refined_string
def change_rolename(longtext,role,delet): originalnames = [] name_pair = [] newnames= [] new_role = copy.deepcopy(role) for x in role: name_list = [] if len(x.split()) == 2: name_list.append(x.split()[0]) name_list.append(x.split()[1]) for y in delet: if y in x.split(): if y.capitalize() in malenames: number1 = random.randint(0,len(girl)-1) number2 = random.randint(0,len(surname)-1) name_list.append(girl[number1]) name_list.append(surname[number2]) print(name_list) originalname = name_list[0] + ' ' + name_list[1] newname = name_list[2] + ' ' + name_list[3] originalnames.append(originalname) newnames.append(newname) if x in new_role: new_role.remove(x) longtext = longtext.replace(originalname, newname) elif y.capitalize() in femalenames: number1 = random.randint(0, len(boy) - 1) number2 = random.randint(0, len(surname) - 1) name_list.append(boy[number1]) name_list.append(surname[number2]) print(name_list) originalname = name_list[0] + ' ' + name_list[1] newname = name_list[2] + ' ' + name_list[3] originalnames.append(originalname) newnames.append(newname) if x in new_role: new_role.remove(x) longtext = longtext.replace(originalname, newname) print(new_role) for y in delet: for z1,z2 in zip(originalnames,newnames): if y in z1: firstname = z2.split()[0] token = tokenize.word_tokenize(longtext) for i, k in enumerate(token): if k == y: token[i] = firstname longtext = TreebankWordDetokenizer().detokenize(token) for rest in new_role: if len(rest.split()) == 1: if rest in malenames: number = random.randint(0, len(girl)-1) longtext = longtext.replace(rest, girl[number]) elif rest in femalenames: number = random.randint(0, len(boy)-1) longtext = longtext.replace(rest, boy[number]) elif rest in unsex: number = random.randint(0, len(unsex)-1) longtext = longtext.replace(x, unsex[number]) elif len(rest.split()) > 1: for part in rest.split(): if part in malenames: number1 = random.randint(0, len(girl) - 1) number2 = random.randint(0, len(surname) - 1) newname = girl[number1] + ' ' + surname[number2] longtext = longtext.replace(rest, newname) break elif part in femalenames: number1 = random.randint(0, len(boy) - 1) number2 = random.randint(0, len(surname) - 1) newname = boy[number1] + ' '+surname[number2] longtext = longtext.replace(rest, newname) break elif part in unsex: number1 = random.randint(0, len(boy) - 1) number2 = random.randint(0, len(surname) - 1) newname = boy[number1] + ' ' + surname[number2] longtext = longtext.replace(rest, newname) break print(longtext) print(new_role) return longtext
c_indices = [] indices = [] for i, tok in enumerate(s): if tok == "CHAPTER" or tok == "Chapter": c_indices.append(True) indices.append(i + 1) for i in indices: if isinstance(deromanize(s[i]), int) or isintance( int(s[i]), int): del (s[i - 1]) # remove chapter del (s[i]) # remove chapter # n = TreebankWordDetokenizer().detokenize(s) #n = n.replace("CHAPTER", "") #n = n.replace("Chapter", "") n = n.replace("--", " - ") n = n.replace("_", "") n = n.strip() #n = " ".join(n.split()) nl.append(n) tok_title = tokenizer.tokenize(t.lower()) output_title = [] for i, v in enumerate(tok_title): output_title.append(v) output_title = "".join(output_title) output_title = output_title.replace(" ", "_") print(output_title) with open(output_title, "w", encoding="UTF-8") as f: for l in nl:
def extro_intro_switch(lineList): reload(sys) sys.setdefaultencoding("utf-8") i = 0 outF_intro = open("myOutFile_intro.txt", "a") outF_extro = open("myOutFile_extro.txt", "a") del lineList[:4] #delete the first 4 lines of the topic text for i in lineList: input = i #each line is the input #Tokenize the sentence tokens = word_tokenize(input) #save info needed for the topic tok_0 = tokens[0] tok_1 = tokens[1] last_tok = tokens[-2:] #delete the info needed for the topic in order to analyze the sentence del tokens[0] del tokens[0] del tokens[-1] del tokens[-1] #detokenize the sentence sent_2 = TreebankWordDetokenizer().detokenize(tokens) tok_0_det = TreebankWordDetokenizer().detokenize(tok_0) tok_1_det = TreebankWordDetokenizer().detokenize(tok_1) last_tok_det = TreebankWordDetokenizer().detokenize(last_tok) #delete the space between the characters tok_0_det = tok_0_det.replace(" ", "") tok_1_det = tok_1_det.replace(" ", "") last_tok_det = last_tok_det.replace(" ", "") #convert to list l = list(sent_2) try: l.append(l[1]) #to avoid list IndexError if tokens[0] == "Posso" and tokens[1] == "chiederti" and tokens[ 2] == "di" and tokens[3] == "nuovo": l[:24] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) print colored("extro sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Posso" and tokens[1] == "chiederti": l[:25] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) print colored("extro sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "So" and tokens[1] == "che": l[:6] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) l2 = list(complete_sent) del (l2[-3]) complete_sent2 = "".join(l2) print colored("extro sentence: ", "blue") print(complete_sent2) outF_extro.write(complete_sent2) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Sei" and tokens[1] == "molto" and tokens[ 2] == "paziente": l[:20] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) l2 = list(complete_sent) del (l2[-3]) complete_sent2 = "".join(l2) print colored("extro sentence: ", "blue") print(complete_sent2) outF_extro.write(complete_sent2) outF_extro.write("\n") print("_____________________________") elif tokens[-1] == "paziente" and tokens[ -2] == "davvero" and tokens[-3] == "sei": l[-23:] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Per" and tokens[1] == "favore" and tokens[ 2] == "," and tokens[3] == "parlami": l[:11] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "Dai," + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Per" and tokens[1] == "favore" and tokens[ 2] == "," and tokens[3] == "dimmi": l[:11] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "Dai," + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Per" and tokens[1] == "favore" and tokens[ 2] == "," and tokens[3] == "dimmi": l[:11] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "Dai," + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Lo" and tokens[1] == "vedo": sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "Lo vedo amico mio" + last_tok_det print(input) print colored('extro-sentence: ', 'red') print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Dovresti": l[:8] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "Forse dovresti" + sent_new + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "E'" and tokens[1] == "molto" and tokens[ 2] == "interessante": l[:27] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "E' molto interessante" + sent_new + last_tok_det print colored('extro-sentence: ', 'red') print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Va" and tokens[1] == "bene" and tokens[3] == ",": l[:8] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "Va bene amico," + sent_new + last_tok_det print colored('extro-sentence: ', 'blue') print complete_sent outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif l[-1] is "?" and tokens[0] != "Posso": complete_sent = tok_0_det + tok_1_det + "Posso farti una domanda?" + sent_2 + last_tok_det print colored('intro-sentence: ', 'red') print complete_sent outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "Mi" and tokens[1] == "puoi" and tokens[ 2] == "dire": l[:12] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "Potresti dirmi" + sent_new + last_tok_det print colored('intro-sentence: ', 'red') print complete_sent outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") else: print colored("Not classifiable: ", 'green') print input print("_____________________________") except IndexError: pass continue outF_extro.close() ourF_intro.close()
def extro_intro_switch(lineList): i = 0 outF_intro = open("myOutFile_intro.txt", "a") outF_extro = open("myOutFile_extro.txt", "a") del lineList[:4] #delete the first 4 lines of the topic text for i in lineList: input = i #each line is the input #Tokenize the sentence tokens = word_tokenize(input) #save info needed for the topic tok_0 = tokens[0] tok_1 = tokens[1] last_tok = tokens[-2:] #delete the info needed for the topic in order to analyze the sentence del tokens[0] del tokens[0] del tokens[-1] del tokens[-1] #detokenize the sentence sent_2 = TreebankWordDetokenizer().detokenize(tokens) tok_0_det = TreebankWordDetokenizer().detokenize(tok_0) tok_1_det = TreebankWordDetokenizer().detokenize(tok_1) last_tok_det = TreebankWordDetokenizer().detokenize(last_tok) #delete the space between the characters tok_0_det = tok_0_det.replace(" ", "") tok_1_det = tok_1_det.replace(" ", "") last_tok_det = last_tok_det.replace(" ", "") #convert to list l = list(sent_2) try: l.append(l[1]) #to avoid list IndexError if tokens[0] == "I" and tokens[1] == "want" or tokens[1] == "need": l[:6] = [] #delete I want/need sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "I would like to have" + sent_new + last_tok_det print(input) print colored("intro need-sentence: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "see": complete_sent = tok_0_det + tok_1_det + "I see my friend" + last_tok_det print(input) print colored('extro-sentence: ', 'red') print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "would" and tokens[ 2] == "like" and tokens[3] == "to" and tokens[4] == "have": l[:20] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "I want " + sent_new + last_tok_det print(input) print colored("extro need-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "come" and tokens[1] == "on" and tokens[ 2] == "tell" and tokens[3] == "me": l[:15] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "I would like to know " + sent_new + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "People" and tokens[1] == "tell" and tokens[ 2] == "me" and tokens[3] == "that": l[:20] = [] l[-1:] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "I know that " + sent_new + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "It" and tokens[1] == "'s" and tokens[ 2] == "good" and tokens[3] == "to" and tokens[4] == "have": l[:9] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "I like " + sent_new + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[-1] == "nice" and tokens[-2] == "be" and tokens[ -3] == "would" and tokens[-4] == "It": l[-16:] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + "It could be nice" + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "think" and tokens[ 2] == "that": l[:13] = [] sent_new = "".join(l) print(input) print colored("extro-sentence: ", "blue") complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") # elif tokens[0] == "I" and tokens[1] == "know" and tokens[2] == "that": # l[:12] = [] # sent_new = "".join(l) # print(input) # print colored("extro-sentence: ", "blue") # complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det # print(complete_sent) # outF_extro.write(complete_sent) # outF_extro.write("\n") # print("_____________________________") if tokens[0] == "I" and tokens[1] == "know" and tokens[2] == "that": l[:12] = [] #delete I know that sent_new = "".join(l) #string tokens_new = word_tokenize(sent_new) #tokenize the string for i in range(0, len(tokens_new)): if tokens_new[i] == "that": del tokens_new[i] sent_new = TreebankWordDetokenizer().detokenize( tokens_new) print(input) print colored("extro-sentence: ", "blue") complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "am" and tokens[ 2] == "aware" and tokens[3] == "that": l[:15] = [] #delete I am aware that sent_new = "".join(l) print(input) print colored("extro-sentence: ", "blue") complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "would" and tokens[ 2] == "like" and tokens[3] == "to" and tokens[4] == "know": l[:20] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "come on, tell me " + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Please" and tokens[1] == "," and tokens[ 2] == "tell" and tokens[3] == "me": l[:15] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "come on, tell me " + sent_new + last_tok_det print(input) l2 = list(complete_sent) del (l2[-3]) complete_sent2 = "".join(l2) print colored("extro-sentence: ", "blue") print(complete_sent2) outF_extro.write(complete_sent2) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Please" and tokens[1] == "tell" and tokens[ 2] == "me": l[:14] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "come on, tell me " + sent_new + last_tok_det print(input) l2 = list(complete_sent) del (l2[-3]) complete_sent2 = "".join(l2) print colored("extro-sentence: ", "blue") print(complete_sent2) outF_extro.write(complete_sent2) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "understand" and tokens[ 2] == ",": l[:14] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "OK " + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "understand" and tokens[ 2] == "that": l[:12] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "It's clear " + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "You" and tokens[1] == "are" and tokens[ 2] == "very" and tokens[3] == "patient": l[:22] = [] sent_new = "".join(l) print(input) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det l2 = list(complete_sent) del (l2[-3]) complete_sent2 = "".join(l2) print colored("extro-sentence: ", "blue") print complete_sent2 outF_extro.write(complete_sent2) outF_extro.write("\n") print("_____________________________") elif tokens[-1] == "patient" and tokens[-2] == "very" and tokens[ -3] == "are" and tokens[-4] == "you": l[-23:] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "love" and tokens[ 2] == "how": l[:10] = [] sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + "It's nice how" + sent_new + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "like" or tokens[ 1] == "love": number = randint( 1, 2 ) #generate a random number to choose between two different adj l[:6] = [] last_element = l[-1] #save the last element if number == 1: if last_element == "s": sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + " are really good" + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") else: sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + " is really good" + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") if number == 2: if last_element == "s": sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + " are really nice" + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") else: sent_new = "".join(l) complete_sent = tok_0_det + tok_1_det + sent_new + " is really nice" + last_tok_det print(input) print colored("extro-sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[1] == "will" and tokens[ 2] == "try" and tokens[3] == "to": l[:13] = [] #delete I will try to sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "I will" + sent_new + last_tok_det print(input) print colored("intro-future sentence: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "I" and tokens[ 1] == "will" and tokens[3] != "try": l[:6] = [] #delete I will sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "I will try to" + sent_new + last_tok_det print(input) print colored("extro-future sentence: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif l[-1] is "?" and tokens[0] != "Can" and tokens[ 0] != "Could": #check if the sentence is a question complete_sent = tok_0_det + tok_1_det + "Can I ask you a question? " + sent_2 + last_tok_det print(input) print colored("intro-question: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "Do": #check if the sentence is a question complete_sent = tok_0_det + tok_1_det + "Can I ask you a question? " + sent_2 + last_tok_det print(input) print colored("intro-question: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif l[-1] == "?": #check if the sentence is a question complete_sent = tok_0_det + tok_1_det + "Can I ask you a question? " + sent_2 + last_tok_det print(input) print colored("intro-question: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "Can" and tokens[1] == "I" and tokens[ 2] == "ask" and tokens[3] == "you" and tokens[ 4] == "a" and tokens[5] == "question": l[:25] = [] #delete the elements (can I ...) sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) print colored("extro-question: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Please" and tokens[ 1] == "," and tokens[2] != "tell": complete_sent = tok_0_det + tok_1_det + "Could you " + sent_2 + "?" + last_tok_det print(input) print colored("intro-request", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "Could" or tokens[0] == "could" and tokens[ 1] == "you": l[:9] = [] #delete could you l.pop() #extract the last element sent_new = "".join(l) #convert to string if tokens[2] != "please": complete_sent = tok_0_det + tok_1_det + "Please" + sent_new + last_tok_det else: complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det print(input) print colored("extro-request: ", "blue") print(complete_sent) outF_extro.write(complete_sent) outF_extro.write("\n") print("_____________________________") elif tokens[0] == "Can" and tokens[1] == "we" and tokens[ 2] == "talk" and tokens[3] == "about": l[:3] = [] l.pop() #extract the last element sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "It could be nice if" + sent_new + ". If you like the idea" + last_tok_det print(input) print colored("intro-suggestion: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "Why" or tokens[0] == "why" and tokens[ 1] == "don't" and tokens[2] == "we": l[:9] = [] #delete why don't we l.pop() #extract the last element sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "It could be nice if" + sent_new + ". If you like the idea" + last_tok_det print(input) print colored("intro-suggestion: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "You" and tokens[1] == "should": l[:10] = [] #delete you should sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "Maybe it's better if you " + sent_new + last_tok_det print(input) l = list(complete_sent) l[-1:] = [] complete_sent = "".join(l) print colored("intro-suggestion: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[ 0] == "How" or tokens[0] == "how" and tokens[1] == "about": l[:9] = [] #delete how about sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "May I suggest" + sent_new + "?" + last_tok_det print(input) print colored("intro-suggestion: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[0] == "Let" and tokens[1] == "'s" and tokens[ 2] == "talk": l[:10] = [] #delete may I suggest sent_new = "".join(l) #convert to string complete_sent = tok_0_det + tok_1_det + "May we talk" + sent_new + "?" + last_tok_det print(input) print colored("intro-suggestion: ", "red") print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[1] == "is" or tokens[1] == "are": tokens[1] = [] #delete the elements from the first to "if" tokens_2 = [x for x in tokens if x] sent_new = TreebankWordDetokenizer().detokenize( tokens_2) #detokenize the sentence complete_sent = tok_0_det + tok_1_det + "I find " + sent_new + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") elif tokens[2] == "is" or tokens[2] == "are": tokens[2] = [] #delete the elements from the first to "if" tokens_2 = [x for x in tokens if x] sent_new = TreebankWordDetokenizer().detokenize( tokens_2) #detokenize the sentence complete_sent = tok_0_det + tok_1_det + "I find " + sent_new + last_tok_det print(input) print colored('intro-sentence: ', 'red') print(complete_sent) outF_intro.write(complete_sent) outF_intro.write("\n") print("_____________________________") else: print colored("Not classifiable: ", 'green') print input print("_____________________________") except IndexError: pass continue outF_extro.close() ourF_intro.close()