Пример #1
0
    def __alter_repeat_for_dont_think_SV(fixed_df):
        try:
            # TODO see if its neccesary to care about should and cant
            idx_of_think = Nlp_util.get_idx_list_of_word("think", fixed_df["base_form"])[0]
            df_after_think = fixed_df.loc[idx_of_think + 1:, :].reset_index(drop=True)
            verb_list = Nlp_util.make_verb_list(df_after_think, type="normal")
            noun_list = Nlp_util.make_noun_list(df_after_think)
            # possibly bug happen here since amount of verbs are different in cant do/dont do
            is_negative_form = Df_util.anything_isin(["not", "never"], df_after_think.loc[:, "base_form"])
            # can add possibly or likely(when its negative)
            head_words = ["so ", "so probably ", "probably ", "so maybe ", "maybe "]
            random_idx_for_heads_words = randint(0, len(head_words) - 1)
            if is_negative_form:
                # まず主語とるそのあとにwouldntいれるその後ろに動詞の原型をいれて、それ以降はつづける
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " would "
                idx_of_not = Nlp_util.get_idx_list_of_word_list(["not", "never"], df_after_think.loc[:, "base_form"])[0]
                verb_row = verb_list.loc[idx_of_not:, :].iloc[0]
                verb = verb_row.base_form + " "

                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_row.name + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
            else:
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " wouldnt "
                verb = verb_list["base_form"].iloc[0] + " "
                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_list.index[0] + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
        except:
            logging.exception('')
            return []
Пример #2
0
    def __is_no_idea(cls, df):
        df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]
        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="normal")
        is_subj_himself = Nlp_util.is_first_subject_in(["i"], noun_list,
                                                       verb_list)
        exist_subj_for_first_verb = Nlp_util.exist_subj_for_first_verb(
            noun_list, verb_list)

        is_idk_what_to_do = Df_util.anything_isin(
            {"do not know", "not sure", "have no idea"},
            df_ex_adverb["base_form"]) and Df_util.anything_isin(
                {"what to do", "how to do", "what to deal", "how to deal"},
                df_ex_adverb["base_form"])

        is_want_advice = Df_util.anything_isin(
            {"want", "need", "give me"},
            df_ex_adverb["base_form"]) and Df_util.anything_isin(
                {"advice", "suggestion"}, df_ex_adverb["word"])

        is_give_me_advice = Df_util.anything_isin(
            {"need", "want", "give me"},
            df_ex_adverb["base_form"]) and Df_util.anything_isin(
                {"advice", "suggestion"}, df_ex_adverb["word"])

        what_should_i_do = Nlp_util.are_words1_words2_words3_in_order(
            df_ex_adverb, ["what"], ["should"], ["i"])

        return (is_subj_himself and (is_idk_what_to_do or is_want_advice)) or (
            not exist_subj_for_first_verb
            and is_give_me_advice) or what_should_i_do
Пример #3
0
    def __exists_SV_around_cc(cls, df, cc_in_message, cc):
        try:
            sent_with_cc = df[df.sidx == cc.sidx]

            nouns_in_sent = Nlp_util.make_noun_list(sent_with_cc)
            verbs_in_sent = Nlp_util.make_verb_list(sent_with_cc, type="normal")

            first_half_of_sent = sent_with_cc[sent_with_cc.widx < cc.widx]
            nouns_in_first_half = nouns_in_sent[(nouns_in_sent.sidx == cc.sidx) & (nouns_in_sent.widx < cc.widx)]
            verbs_in_first_half = verbs_in_sent[(verbs_in_sent.sidx == cc.sidx) & (verbs_in_sent.widx < cc.widx)]

            other_cc_sidx = cc_in_message[(cc_in_message.sidx == cc.sidx) & (cc_in_message.widx > cc.widx)].sidx
            exists_other_cc_in_same_sentence = any(i == cc.sidx for i in other_cc_sidx)

            if exists_other_cc_in_same_sentence:
                other_cc_widx = cc_in_message[cc_in_message.index > cc.widx].index[0]
            else:
                other_cc_widx = len(sent_with_cc)

            second_half_of_sent = sent_with_cc[(sent_with_cc.widx >= cc.widx) & (sent_with_cc.widx < other_cc_widx)]

            if cls.__is_finishing_with_question_mark(second_half_of_sent):
                second_half_of_sent = second_half_of_sent.iloc[:-1]

            nouns_in_second_half = cls.__get_second_half_words(nouns_in_sent, cc, other_cc_widx)
            verbs_in_second_half = cls.__get_second_half_words(verbs_in_sent, cc, other_cc_widx)

            return cls.__is_complete_sent(nouns_in_first_half, verbs_in_first_half, first_half_of_sent) and \
                   cls.__is_complete_sent(nouns_in_second_half, verbs_in_second_half, second_half_of_sent)
        except:
            logging.exception('')
            return False
Пример #4
0
    def __is_complaint_or_dissing(df):
        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="normal")
        said_you_dont_listen = Nlp_util.is_first_subject_in(
            ["you"], noun_list, verb_list) and Df_util.anything_isin(
                ["not listen", "never listen"], df["base_form"])
        is_dissing = Df_util.anything_isin(["f**k you", "hate you"],
                                           df["base_form"])

        return said_you_dont_listen or is_dissing
Пример #5
0
    def __is_despising_himself(df):
        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="normal")
        adj_list = Nlp_util.make_adj_list(df)

        is_first_sub_i = Nlp_util.is_first_subject_in(["i"], noun_list,
                                                      verb_list)
        is_the_verb_be = Nlp_util.is_first_verb_in(["be"],
                                                   noun_list,
                                                   verb_list,
                                                   column_name="base_form")
        is_the_adj_despising = Nlp_util.is_first_adj_after_first_sub_in(
            WORDS_DESPITE_HIMSELF.word.tolist(), noun_list, adj_list)
        return is_first_sub_i and is_the_verb_be and is_the_adj_despising
Пример #6
0
    def get_sidx_of_not_basic_svo_sent(text_df):
        delete_sidx_list = []
        for sidx in set(text_df.sidx.values):
            df = text_df[text_df.sidx == sidx]
            noun_list = Nlp_util.make_noun_list(df)
            verb_list = Nlp_util.make_verb_list(df, type="normal")

            # catch the case such as "Dont judge me"
            if Nlp_util.is_any_verb_before_first_noun(noun_list, verb_list):
                delete_sidx_list.append(sidx)
            # catch the case such as "the situation horrible as like he said"
            elif not Nlp_util.is_any_verb_for_first_noun(noun_list, verb_list):
                delete_sidx_list.append(sidx)
            else:
                pass

        return delete_sidx_list
Пример #7
0
    def __exists_want_to(cls, df):
        df_without_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]

        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="basic")

        idx_of_i_wanna = Nlp_util.get_idx_list_of_idiom(
            "i want to", df_without_adverb.base_form)

        if len(idx_of_i_wanna) != 0 and len(
                df.loc[idx_of_i_wanna[0] + 2:, :]) > 1:
            if cls.__exists_word_after_want_to(
                    df) and Nlp_util.is_first_subject_in({"i"}, noun_list,
                                                         verb_list):
                return True
            else:
                return False
        else:
            return False