Пример #1
0
    def __alter_repeat_for_dont_think_SV(fixed_df):
        try:
            # TODO see if its neccesary to care about should and cant
            idx_of_think = Nlp_util.get_idx_list_of_word("think", fixed_df["base_form"])[0]
            df_after_think = fixed_df.loc[idx_of_think + 1:, :].reset_index(drop=True)
            verb_list = Nlp_util.make_verb_list(df_after_think, type="normal")
            noun_list = Nlp_util.make_noun_list(df_after_think)
            # possibly bug happen here since amount of verbs are different in cant do/dont do
            is_negative_form = Df_util.anything_isin(["not", "never"], df_after_think.loc[:, "base_form"])
            # can add possibly or likely(when its negative)
            head_words = ["so ", "so probably ", "probably ", "so maybe ", "maybe "]
            random_idx_for_heads_words = randint(0, len(head_words) - 1)
            if is_negative_form:
                # まず主語とるそのあとにwouldntいれるその後ろに動詞の原型をいれて、それ以降はつづける
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " would "
                idx_of_not = Nlp_util.get_idx_list_of_word_list(["not", "never"], df_after_think.loc[:, "base_form"])[0]
                verb_row = verb_list.loc[idx_of_not:, :].iloc[0]
                verb = verb_row.base_form + " "

                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_row.name + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
            else:
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " wouldnt "
                verb = verb_list["base_form"].iloc[0] + " "
                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_list.index[0] + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
        except:
            logging.exception('')
            return []
Пример #2
0
    def __correct_pos_tag(df, dic):
        if any(df["word"].isin(dic.keys())):
            df.loc[df["word"].isin(dic.keys()), "pos"] = df[df["word"].isin(
                dic.keys())].apply(lambda row: dic[row["word"]], axis=1)

        if any(df["word"].isin(["that", "it", "this"])):
            idx_list_of_kws = Nlp_util.get_idx_list_of_word_list(
                ["that", "it", "this"], df["word"])
            for idx_of_kw in idx_list_of_kws:
                condition = df.loc[
                    idx_of_kw + 1,
                    "pos"] not in Nlp_util.pos_PRPs + Nlp_util.pos_NOUNs
                if idx_of_kw == len(df) - 1 or condition:
                    df.loc[idx_of_kw, "pos"] = "NN"
                else:
                    pass

        if Df_util.anything_isin(["like", "care", "guess", "need"],
                                 df["word"]):
            idx_list_of_like = Nlp_util.get_idx_list_of_word_list(
                ["like", "care", "guess", "need"], df["word"])
            for idx_of_like in idx_list_of_like:
                if not idx_of_like == 0 and df.loc[
                        idx_of_like - 1,
                        "pos"] in Nlp_util.pos_NOUNs + Nlp_util.pos_PRPs:
                    df.loc[idx_of_like, "pos"] = "VB"
                else:
                    pass
        if Df_util.anything_isin(["work"], df["word"]):
            idx_list_of_work = Nlp_util.get_idx_list_of_word_list(["work"],
                                                                  df["word"])
            for idx_of_work in idx_list_of_work:
                if not idx_of_work == 0 and df.loc[idx_of_work - 1,
                                                   "word"] in ["this"]:
                    df.loc[idx_of_work, "pos"] = "VB"
                else:
                    pass

        return df
Пример #3
0
    def __alter_repeat_for_make_S_feel_ADJ(df):
        idx_of_make = Nlp_util.get_idx_list_of_word_list(["make"],
                                                         df["base_form"])[0]
        subj = Nlp_util.change_object_pronoun_to_pronoun(df.loc[idx_of_make +
                                                                1, "word"])
        df_after_subj = df.loc[idx_of_make + 2:idx_of_make + 4, :]
        adj = df_after_subj.loc[
            df_after_subj["pos"].isin(Nlp_util.pos_ADJECTIVEs), "word"].iloc[0]
        subj_adj_list = [subj, adj]
        options = [
            ["{0[0]} feel {0[1]} because of that".format(subj_adj_list)],
            ["thats getting {0[0]} feel {0[1]}".format(subj_adj_list)],
            ["thats the moment {0[0]} feel {0[1]}".format(subj_adj_list)],
        ]

        random_idx = randint(0, len(options) - 1)
        return options[random_idx]
Пример #4
0
 def __exists_make_S_feel_ADJ(df):
     try:
         idx_list_of_make = Nlp_util.get_idx_list_of_word_list(["make"], df["base_form"])
         if len(idx_list_of_make) == 0:
             return False
         else:
             is_after_make_prp = df.loc[idx_list_of_make[0] + 1, "pos"] in Nlp_util.pos_PRPs
             if is_after_make_prp:
                 is_after_prp_adj = df.loc[idx_list_of_make[0] + 2, "pos"] in Nlp_util.pos_ADJECTIVEs or (
                         df.loc[idx_list_of_make[0] + 2, "base_form"] == "feel" and any(
                     df.loc[idx_list_of_make[0] + 2:idx_list_of_make[0] + 4, "pos"].isin(
                         Nlp_util.pos_ADJECTIVEs)))
                 return is_after_prp_adj
             else:
                 return False
     except:
         logging.exception('')
         return False