示例#1
0
def detokenize(input):
    output = TreebankWordDetokenizer().detokenize(input)
    output = output.replace(" , ", ", ")
    output = output.replace(" ' ", "'")
    output = output.replace(" ’ ", "’")
    output = output.replace(" ’", "’")
    output = output.replace(" . ", ". ")
    output = output.replace(" : ", ": ")
    output = output.replace(" ; ", "; ")
    return output
示例#2
0
def detokenize(tokens):

    #Detokenizing tokens into sentence
    detokenized_text = TreebankWordDetokenizer().detokenize(tokens)
    detokenized_text = detokenized_text.replace(
        " .", ".")  #Remove the space before period
    detokenized_text = detokenized_text.replace(
        " ,", ",")  #Remove the space before comma
    detokenized_text = detokenized_text.replace(
        " :", ":")  #Remove the space before colon
    return detokenized_text
示例#3
0
def clean(features):
    Features = []
    for review in features:
        words = nltk.tokenize.word_tokenize(review)

        words = [word.lower() for word in words]

        # remove punctuations
        transform = str.maketrans('', '', string.punctuation)
        words = [word.translate(transform) for word in words]

        # remove non-alphabetical words
        words = [word for word in words if word.isalpha()]

        # remove uneccessary and overused prepositions like a, and, the
        stop_words = set(stopwords.words('english'))
        words = [word for word in words if not word in stop_words]

        # combine list of words back to a review
        review = TreebankWordDetokenizer().detokenize(words)

        # convert all words to their root word according to meaning
        lemmatizer = WordNetLemmatizer()
        words = [lemmatizer.lemmatize(word, pos='v') for word in words]
        words = [lemmatizer.lemmatize(word, pos='a') for word in words]

        # remove line break tags
        review = review.replace('br', '')

        Features.append(review)

    return Features
示例#4
0
def remove_stopwords(text):
    """
    Remove all the stopwords in ``text`` like: the, an, a, in, etc. using nltk list of stopwords.
    Args:
        text (str): raw text
    Returns:
        str
    """
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)

    sentences = [w for w in word_tokens if not w in stop_words]
    s = TreebankWordDetokenizer().detokenize(sentences)
    s = s.replace("``", ' "').replace("''", '"')
    while " 's" in s:
        s = s.replace(" 's", "'s")
    return s.replace(" .", ".")
def make_readable_sampl(samp):
    s = samp['token'].copy()

    s[samp['subj_start']] = '<e1>' + s[samp['subj_start']]
    s[samp['subj_end']] = s[samp['subj_end']] + '</e1>'

    s[samp['obj_start']] = '<e2>' + s[samp['obj_start']]
    s[samp['obj_end']] = s[samp['obj_end']] + '</e2>'

    s_detokenize = TreebankWordDetokenizer().detokenize(s)

    s_detokenize = s_detokenize.replace(">>", ">")
    s_detokenize = s_detokenize.replace("<<", "<")

    return {'id': samp['id'],'relation': samp['relation'], 'token': s_detokenize, 'subj_type':samp['subj_type'], 'obj_type':samp['obj_type']}
示例#6
0
def remove_duplicate_tokens(input_string):
    """Removes duplicate tokens from input string, unless permitted

    :param input_string:
    :return: output string without duplicate tokens unless allowed
    :rtype: str
    """

    refined_phrase_list = []
    new_phrase_list = input_string.split(' ')
    for token in new_phrase_list:
        if token not in refined_phrase_list:
            refined_phrase_list.append(token)
    refined_string = TreebankWordDetokenizer().detokenize(refined_phrase_list)
    refined_string = refined_string.strip()

    # Permitted duplicate tokens restored (for more such tokens, in
    # future it can be dealt by storing in pre-defined resources)
    if "gallus gallus" in input_string \
            and "gallus gallus" not in refined_string:
        refined_string = refined_string.replace("gallus", "gallus gallus")

    return refined_string
示例#7
0
def change_rolename(longtext,role,delet):
    originalnames = []
    name_pair = []
    newnames= []
    new_role = copy.deepcopy(role)
    for x in role:
        name_list = []
        if len(x.split()) == 2:

            name_list.append(x.split()[0])
            name_list.append(x.split()[1])
            for y in delet:
                if y in x.split():
                    if y.capitalize() in malenames:
                        number1 = random.randint(0,len(girl)-1)
                        number2 = random.randint(0,len(surname)-1)
                        name_list.append(girl[number1])
                        name_list.append(surname[number2])
                        print(name_list)
                        originalname = name_list[0] + ' ' + name_list[1]
                        newname = name_list[2] + ' ' + name_list[3]
                        originalnames.append(originalname)
                        newnames.append(newname)
                        if x in new_role:
                            new_role.remove(x)
                        longtext = longtext.replace(originalname, newname)
                    elif y.capitalize() in femalenames:
                        number1 = random.randint(0, len(boy) - 1)
                        number2 = random.randint(0, len(surname) - 1)
                        name_list.append(boy[number1])
                        name_list.append(surname[number2])
                        print(name_list)
                        originalname = name_list[0] + ' ' + name_list[1]
                        newname = name_list[2] + ' ' + name_list[3]
                        originalnames.append(originalname)
                        newnames.append(newname)
                        if x in new_role:
                            new_role.remove(x)
                        longtext = longtext.replace(originalname, newname)
    print(new_role)

    for y in delet:
        for z1,z2 in zip(originalnames,newnames):
            if y in z1:
                firstname = z2.split()[0]
                token = tokenize.word_tokenize(longtext)
                for i, k in enumerate(token):
                    if k == y:
                        token[i] = firstname
                longtext = TreebankWordDetokenizer().detokenize(token)

    for rest in new_role:
        if len(rest.split()) == 1:
            if rest in malenames:
                number = random.randint(0, len(girl)-1)
                longtext = longtext.replace(rest, girl[number])
            elif rest in femalenames:
                number = random.randint(0, len(boy)-1)
                longtext = longtext.replace(rest, boy[number])
            elif rest in unsex:
                number = random.randint(0, len(unsex)-1)
                longtext = longtext.replace(x, unsex[number])
        elif len(rest.split()) > 1:

            for part in rest.split():
                if part in malenames:
                    number1 = random.randint(0, len(girl) - 1)
                    number2 = random.randint(0, len(surname) - 1)
                    newname = girl[number1] + ' ' + surname[number2]
                    longtext = longtext.replace(rest, newname)
                    break
                elif part in femalenames:
                    number1 = random.randint(0, len(boy) - 1)
                    number2 = random.randint(0, len(surname) - 1)
                    newname = boy[number1] + ' '+surname[number2]
                    longtext = longtext.replace(rest, newname)
                    break
                elif part in unsex:
                    number1 = random.randint(0, len(boy) - 1)
                    number2 = random.randint(0, len(surname) - 1)
                    newname = boy[number1] + ' ' + surname[number2]
                    longtext = longtext.replace(rest, newname)
                    break
    print(longtext)
    print(new_role)
    return longtext
示例#8
0
            c_indices = []
            indices = []
            for i, tok in enumerate(s):
                if tok == "CHAPTER" or tok == "Chapter":
                    c_indices.append(True)
                    indices.append(i + 1)
            for i in indices:
                if isinstance(deromanize(s[i]), int) or isintance(
                        int(s[i]), int):
                    del (s[i - 1])  # remove chapter
                    del (s[i])  # remove chapter #

            n = TreebankWordDetokenizer().detokenize(s)
        #n = n.replace("CHAPTER", "")
        #n = n.replace("Chapter", "")
        n = n.replace("--", " - ")
        n = n.replace("_", "")
        n = n.strip()

        #n = " ".join(n.split())
        nl.append(n)

    tok_title = tokenizer.tokenize(t.lower())
    output_title = []
    for i, v in enumerate(tok_title):
        output_title.append(v)
    output_title = "".join(output_title)
    output_title = output_title.replace(" ", "_")
    print(output_title)
    with open(output_title, "w", encoding="UTF-8") as f:
        for l in nl:
示例#9
0
def extro_intro_switch(lineList):

    reload(sys)
    sys.setdefaultencoding("utf-8")

    i = 0
    outF_intro = open("myOutFile_intro.txt", "a")
    outF_extro = open("myOutFile_extro.txt", "a")
    del lineList[:4]  #delete the first 4 lines of the topic text

    for i in lineList:
        input = i  #each line is the input

        #Tokenize the sentence
        tokens = word_tokenize(input)

        #save info needed for the topic
        tok_0 = tokens[0]
        tok_1 = tokens[1]
        last_tok = tokens[-2:]

        #delete the info needed for the topic in order to analyze the sentence
        del tokens[0]
        del tokens[0]
        del tokens[-1]
        del tokens[-1]

        #detokenize the sentence
        sent_2 = TreebankWordDetokenizer().detokenize(tokens)

        tok_0_det = TreebankWordDetokenizer().detokenize(tok_0)
        tok_1_det = TreebankWordDetokenizer().detokenize(tok_1)
        last_tok_det = TreebankWordDetokenizer().detokenize(last_tok)

        #delete the space between the characters
        tok_0_det = tok_0_det.replace(" ", "")
        tok_1_det = tok_1_det.replace(" ", "")
        last_tok_det = last_tok_det.replace(" ", "")

        #convert to list
        l = list(sent_2)

        try:
            l.append(l[1])  #to avoid list IndexError

            if tokens[0] == "Posso" and tokens[1] == "chiederti" and tokens[
                    2] == "di" and tokens[3] == "nuovo":
                l[:24] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)
                print colored("extro sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Posso" and tokens[1] == "chiederti":
                l[:25] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)
                print colored("extro sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "So" and tokens[1] == "che":
                l[:6] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)

                l2 = list(complete_sent)
                del (l2[-3])
                complete_sent2 = "".join(l2)

                print colored("extro sentence: ", "blue")
                print(complete_sent2)
                outF_extro.write(complete_sent2)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Sei" and tokens[1] == "molto" and tokens[
                    2] == "paziente":
                l[:20] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)

                l2 = list(complete_sent)
                del (l2[-3])
                complete_sent2 = "".join(l2)

                print colored("extro sentence: ", "blue")
                print(complete_sent2)
                outF_extro.write(complete_sent2)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[-1] == "paziente" and tokens[
                    -2] == "davvero" and tokens[-3] == "sei":
                l[-23:] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Per" and tokens[1] == "favore" and tokens[
                    2] == "," and tokens[3] == "parlami":
                l[:11] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "Dai," + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Per" and tokens[1] == "favore" and tokens[
                    2] == "," and tokens[3] == "dimmi":
                l[:11] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "Dai," + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Per" and tokens[1] == "favore" and tokens[
                    2] == "," and tokens[3] == "dimmi":
                l[:11] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "Dai," + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Lo" and tokens[1] == "vedo":
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "Lo vedo amico mio" + last_tok_det
                print(input)
                print colored('extro-sentence: ', 'red')
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Dovresti":
                l[:8] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "Forse dovresti" + sent_new + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "E'" and tokens[1] == "molto" and tokens[
                    2] == "interessante":
                l[:27] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "E' molto interessante" + sent_new + last_tok_det
                print colored('extro-sentence: ', 'red')
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Va" and tokens[1] == "bene" and tokens[3] == ",":
                l[:8] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "Va bene amico," + sent_new + last_tok_det
                print colored('extro-sentence: ', 'blue')
                print complete_sent
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif l[-1] is "?" and tokens[0] != "Posso":
                complete_sent = tok_0_det + tok_1_det + "Posso farti una domanda?" + sent_2 + last_tok_det
                print colored('intro-sentence: ', 'red')
                print complete_sent
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Mi" and tokens[1] == "puoi" and tokens[
                    2] == "dire":
                l[:12] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "Potresti dirmi" + sent_new + last_tok_det
                print colored('intro-sentence: ', 'red')
                print complete_sent
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            else:
                print colored("Not classifiable: ", 'green')
                print input
                print("_____________________________")
        except IndexError:
            pass
        continue

        outF_extro.close()
        ourF_intro.close()
示例#10
0
def extro_intro_switch(lineList):

    i = 0
    outF_intro = open("myOutFile_intro.txt", "a")
    outF_extro = open("myOutFile_extro.txt", "a")
    del lineList[:4]  #delete the first 4 lines of the topic text

    for i in lineList:
        input = i  #each line is the input

        #Tokenize the sentence
        tokens = word_tokenize(input)

        #save info needed for the topic
        tok_0 = tokens[0]
        tok_1 = tokens[1]
        last_tok = tokens[-2:]

        #delete the info needed for the topic in order to analyze the sentence
        del tokens[0]
        del tokens[0]
        del tokens[-1]
        del tokens[-1]

        #detokenize the sentence
        sent_2 = TreebankWordDetokenizer().detokenize(tokens)

        tok_0_det = TreebankWordDetokenizer().detokenize(tok_0)
        tok_1_det = TreebankWordDetokenizer().detokenize(tok_1)
        last_tok_det = TreebankWordDetokenizer().detokenize(last_tok)

        #delete the space between the characters
        tok_0_det = tok_0_det.replace(" ", "")
        tok_1_det = tok_1_det.replace(" ", "")
        last_tok_det = last_tok_det.replace(" ", "")

        #convert to list
        l = list(sent_2)

        try:
            l.append(l[1])  #to avoid list IndexError

            if tokens[0] == "I" and tokens[1] == "want" or tokens[1] == "need":
                l[:6] = []  #delete I want/need
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "I would like to have" + sent_new + last_tok_det
                print(input)
                print colored("intro need-sentence: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "see":
                complete_sent = tok_0_det + tok_1_det + "I see my friend" + last_tok_det
                print(input)
                print colored('extro-sentence: ', 'red')
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "would" and tokens[
                    2] == "like" and tokens[3] == "to" and tokens[4] == "have":
                l[:20] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "I want " + sent_new + last_tok_det
                print(input)
                print colored("extro need-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "come" and tokens[1] == "on" and tokens[
                    2] == "tell" and tokens[3] == "me":
                l[:15] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "I would like to know " + sent_new + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "People" and tokens[1] == "tell" and tokens[
                    2] == "me" and tokens[3] == "that":
                l[:20] = []
                l[-1:] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "I know that " + sent_new + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "It" and tokens[1] == "'s" and tokens[
                    2] == "good" and tokens[3] == "to" and tokens[4] == "have":
                l[:9] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "I like " + sent_new + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[-1] == "nice" and tokens[-2] == "be" and tokens[
                    -3] == "would" and tokens[-4] == "It":
                l[-16:] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + "It could be nice" + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "think" and tokens[
                    2] == "that":
                l[:13] = []
                sent_new = "".join(l)
                print(input)
                print colored("extro-sentence: ", "blue")
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            # elif tokens[0] == "I" and tokens[1] == "know" and tokens[2] == "that":
            #     l[:12] = []
            #     sent_new = "".join(l)
            #     print(input)
            #     print colored("extro-sentence: ", "blue")
            #     complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
            #     print(complete_sent)
            #     outF_extro.write(complete_sent)
            #     outF_extro.write("\n")
            #     print("_____________________________")

            if tokens[0] == "I" and tokens[1] == "know" and tokens[2] == "that":
                l[:12] = []  #delete I know that
                sent_new = "".join(l)  #string

                tokens_new = word_tokenize(sent_new)  #tokenize the string

                for i in range(0, len(tokens_new)):
                    if tokens_new[i] == "that":
                        del tokens_new[i]
                        sent_new = TreebankWordDetokenizer().detokenize(
                            tokens_new)
                print(input)
                print colored("extro-sentence: ", "blue")
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "am" and tokens[
                    2] == "aware" and tokens[3] == "that":
                l[:15] = []  #delete I am aware that
                sent_new = "".join(l)
                print(input)
                print colored("extro-sentence: ", "blue")
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "would" and tokens[
                    2] == "like" and tokens[3] == "to" and tokens[4] == "know":
                l[:20] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "come on, tell me " + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Please" and tokens[1] == "," and tokens[
                    2] == "tell" and tokens[3] == "me":
                l[:15] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "come on, tell me " + sent_new + last_tok_det
                print(input)

                l2 = list(complete_sent)
                del (l2[-3])
                complete_sent2 = "".join(l2)

                print colored("extro-sentence: ", "blue")
                print(complete_sent2)
                outF_extro.write(complete_sent2)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Please" and tokens[1] == "tell" and tokens[
                    2] == "me":
                l[:14] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "come on, tell me " + sent_new + last_tok_det
                print(input)

                l2 = list(complete_sent)
                del (l2[-3])
                complete_sent2 = "".join(l2)

                print colored("extro-sentence: ", "blue")
                print(complete_sent2)
                outF_extro.write(complete_sent2)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "understand" and tokens[
                    2] == ",":
                l[:14] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "OK " + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "understand" and tokens[
                    2] == "that":
                l[:12] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "It's clear " + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "You" and tokens[1] == "are" and tokens[
                    2] == "very" and tokens[3] == "patient":
                l[:22] = []
                sent_new = "".join(l)
                print(input)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det

                l2 = list(complete_sent)
                del (l2[-3])
                complete_sent2 = "".join(l2)

                print colored("extro-sentence: ", "blue")
                print complete_sent2
                outF_extro.write(complete_sent2)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[-1] == "patient" and tokens[-2] == "very" and tokens[
                    -3] == "are" and tokens[-4] == "you":
                l[-23:] = []
                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "love" and tokens[
                    2] == "how":
                l[:10] = []

                sent_new = "".join(l)
                complete_sent = tok_0_det + tok_1_det + "It's nice how" + sent_new + last_tok_det
                print(input)
                print colored("extro-sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "like" or tokens[
                    1] == "love":
                number = randint(
                    1, 2
                )  #generate a random number to choose between two different adj
                l[:6] = []
                last_element = l[-1]  #save the last element
                if number == 1:
                    if last_element == "s":
                        sent_new = "".join(l)
                        complete_sent = tok_0_det + tok_1_det + sent_new + " are really good" + last_tok_det
                        print(input)
                        print colored("extro-sentence: ", "blue")
                        print(complete_sent)
                        outF_extro.write(complete_sent)
                        outF_extro.write("\n")
                        print("_____________________________")
                    else:
                        sent_new = "".join(l)
                        complete_sent = tok_0_det + tok_1_det + sent_new + " is really good" + last_tok_det
                        print(input)
                        print colored("extro-sentence: ", "blue")
                        print(complete_sent)
                        outF_extro.write(complete_sent)
                        outF_extro.write("\n")
                        print("_____________________________")
                if number == 2:
                    if last_element == "s":
                        sent_new = "".join(l)
                        complete_sent = tok_0_det + tok_1_det + sent_new + " are really nice" + last_tok_det
                        print(input)
                        print colored("extro-sentence: ", "blue")
                        print(complete_sent)
                        outF_extro.write(complete_sent)
                        outF_extro.write("\n")
                        print("_____________________________")
                    else:
                        sent_new = "".join(l)
                        complete_sent = tok_0_det + tok_1_det + sent_new + " is really nice" + last_tok_det
                        print(input)
                        print colored("extro-sentence: ", "blue")
                        print(complete_sent)
                        outF_extro.write(complete_sent)
                        outF_extro.write("\n")
                        print("_____________________________")

            elif tokens[0] == "I" and tokens[1] == "will" and tokens[
                    2] == "try" and tokens[3] == "to":
                l[:13] = []  #delete I will try to
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "I will" + sent_new + last_tok_det
                print(input)
                print colored("intro-future sentence: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "I" and tokens[
                    1] == "will" and tokens[3] != "try":
                l[:6] = []  #delete I will
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "I will try to" + sent_new + last_tok_det
                print(input)
                print colored("extro-future sentence: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif l[-1] is "?" and tokens[0] != "Can" and tokens[
                    0] != "Could":  #check if the sentence is a question
                complete_sent = tok_0_det + tok_1_det + "Can I ask you a question? " + sent_2 + last_tok_det
                print(input)
                print colored("intro-question: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Do":  #check if the sentence is a question
                complete_sent = tok_0_det + tok_1_det + "Can I ask you a question? " + sent_2 + last_tok_det
                print(input)
                print colored("intro-question: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif l[-1] == "?":  #check if the sentence is a question
                complete_sent = tok_0_det + tok_1_det + "Can I ask you a question? " + sent_2 + last_tok_det
                print(input)
                print colored("intro-question: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Can" and tokens[1] == "I" and tokens[
                    2] == "ask" and tokens[3] == "you" and tokens[
                        4] == "a" and tokens[5] == "question":
                l[:25] = []  #delete the elements (can I ...)
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)
                print colored("extro-question: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Please" and tokens[
                    1] == "," and tokens[2] != "tell":
                complete_sent = tok_0_det + tok_1_det + "Could you " + sent_2 + "?" + last_tok_det
                print(input)
                print colored("intro-request", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Could" or tokens[0] == "could" and tokens[
                    1] == "you":
                l[:9] = []  #delete could you
                l.pop()  #extract the last element
                sent_new = "".join(l)  #convert to string
                if tokens[2] != "please":
                    complete_sent = tok_0_det + tok_1_det + "Please" + sent_new + last_tok_det
                else:
                    complete_sent = tok_0_det + tok_1_det + sent_new + last_tok_det
                print(input)
                print colored("extro-request: ", "blue")
                print(complete_sent)
                outF_extro.write(complete_sent)
                outF_extro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Can" and tokens[1] == "we" and tokens[
                    2] == "talk" and tokens[3] == "about":
                l[:3] = []
                l.pop()  #extract the last element
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "It could be nice if" + sent_new + ". If you like the idea" + last_tok_det
                print(input)
                print colored("intro-suggestion: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Why" or tokens[0] == "why" and tokens[
                    1] == "don't" and tokens[2] == "we":
                l[:9] = []  #delete why don't we
                l.pop()  #extract the last element
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "It could be nice if" + sent_new + ". If you like the idea" + last_tok_det
                print(input)
                print colored("intro-suggestion: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "You" and tokens[1] == "should":
                l[:10] = []  #delete you should
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "Maybe it's better if you " + sent_new + last_tok_det
                print(input)

                l = list(complete_sent)
                l[-1:] = []
                complete_sent = "".join(l)

                print colored("intro-suggestion: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[
                    0] == "How" or tokens[0] == "how" and tokens[1] == "about":
                l[:9] = []  #delete how about
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "May I suggest" + sent_new + "?" + last_tok_det
                print(input)
                print colored("intro-suggestion: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[0] == "Let" and tokens[1] == "'s" and tokens[
                    2] == "talk":
                l[:10] = []  #delete may I suggest
                sent_new = "".join(l)  #convert to string
                complete_sent = tok_0_det + tok_1_det + "May we talk" + sent_new + "?" + last_tok_det
                print(input)
                print colored("intro-suggestion: ", "red")
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[1] == "is" or tokens[1] == "are":
                tokens[1] = []  #delete the elements from the first to "if"
                tokens_2 = [x for x in tokens if x]
                sent_new = TreebankWordDetokenizer().detokenize(
                    tokens_2)  #detokenize the sentence
                complete_sent = tok_0_det + tok_1_det + "I find " + sent_new + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            elif tokens[2] == "is" or tokens[2] == "are":
                tokens[2] = []  #delete the elements from the first to "if"
                tokens_2 = [x for x in tokens if x]
                sent_new = TreebankWordDetokenizer().detokenize(
                    tokens_2)  #detokenize the sentence
                complete_sent = tok_0_det + tok_1_det + "I find " + sent_new + last_tok_det
                print(input)
                print colored('intro-sentence: ', 'red')
                print(complete_sent)
                outF_intro.write(complete_sent)
                outF_intro.write("\n")
                print("_____________________________")

            else:
                print colored("Not classifiable: ", 'green')
                print input
                print("_____________________________")
        except IndexError:
            pass
        continue

        outF_extro.close()
        ourF_intro.close()