Пример #1
0
    def repetitive_group_recognition(self, show=False, clean_attrs=True):
        df_nontext = rep.recog_repetition_nontext(self, show)
        df_text = rep.recog_repetition_text(self, show)
        df = self.compos_dataframe

        df = df.merge(df_nontext, how='left')
        df.loc[df['alignment'].isna(), 'alignment'] = df_text['alignment']
        df = df.merge(df_text, how='left')
        df.rename({'alignment': 'alignment_in_group'}, axis=1, inplace=True)

        if clean_attrs:
            df = df.drop(list(df.filter(like='cluster')), axis=1)
            df = df.fillna(-1)

            for i in range(len(df)):
                if df.iloc[i]['group_nontext'] != -1:
                    df.loc[i, 'group'] = 'nt-' + str(
                        int(df.iloc[i]['group_nontext']))
                elif df.iloc[i]['group_text'] != -1:
                    df.loc[i,
                           'group'] = 't-' + str(int(df.iloc[i]['group_text']))

            groups = df.groupby('group').groups
            for i in groups:
                if len(groups[i]) == 1:
                    df.loc[groups[i], 'group'] = -1
            df.group = df.group.fillna(-1)

        # df = rep.rm_invalid_groups(df)
        self.compos_dataframe = df
Пример #2
0
    def repetitive_group_recognition(self,
                                     show=False,
                                     clean_attrs=True):  # call from notebook
        df_nontext = rep.recog_repetition_nontext(
            self, show)  #call this func recog_repetition_nontext
        df_text = rep.recog_repetition_text(self, show)

        #call this func recog_repetition_text

        df = self.compos_dataframe  #original data frame

        df = df.merge(df_nontext, how='left')
        #print(df) #merged with nontext
        #print(df_text['alignment'])

        if 'alignment' not in df.columns:
            #print(True)
            NaN = np.nan
            df["alignment"] = NaN
        # print(df)
        # print("df_text")
        # print(df_text)

        df.loc[df['alignment'].isna(), 'alignment'] = df_text['alignment']
        df = df.merge(df_text, how='left')
        df.rename({'alignment': 'alignment_in_group'}, axis=1, inplace=True)

        if clean_attrs:
            df = df.drop(list(df.filter(like='cluster')), axis=1)
            df = df.fillna(-1)

            for i in range(len(df)):
                if df.iloc[i]['group_nontext'] != -1:
                    df.loc[i, 'group'] = 'nt-' + str(
                        int(df.iloc[i]['group_nontext']))
                elif df.iloc[i]['group_text'] != -1:
                    df.loc[i,
                           'group'] = 't-' + str(int(df.iloc[i]['group_text']))

            groups = df.groupby('group').groups
            for i in groups:
                if len(groups[i]) == 1:
                    df.loc[groups[i], 'group'] = -1
            df.group = df.group.fillna(-1)

        # df = rep.rm_invalid_groups(df)
        self.compos_dataframe = df
Пример #3
0
    def repetitive_group_recognition(self, show=False, clean_attrs=True):
        df_nontext = rep.recog_repetition_nontext(self, show)
        df_text = rep.recog_repetition_text(self, show)
        df = self.compos_dataframe

        df = df.merge(df_nontext, how='left')
        df.loc[df['alignment'].isna(), 'alignment'] = df_text['alignment']
        df = df.merge(df_text, how='left')

        if clean_attrs:
            df = df.drop(list(df.filter(like='cluster')), axis=1)
            df = df.fillna(-1)

            for i in range(len(df)):
                if df.iloc[i]['group_nontext'] != -1:
                    df.loc[i, 'group'] = 'nt-' + str(
                        int(df.iloc[i]['group_nontext']))
                elif df.iloc[i]['group_text'] != -1:
                    df.loc[i,
                           'group'] = 't-' + str(int(df.iloc[i]['group_text']))

            # df[list(df.filter(like='group'))] = df[list(df.filter(like='group'))].astype(int)
        self.compos_dataframe = df