Пример #1
0
def data_cleaning(data, type=TYPE):
    df = data.copy()
    if DESC_COL in df:
        df[DESC_COL] = df[DESC_COL].apply(
            lambda x: re.sub('[0-9]+', '', str(x)))
    print 'class instantiation'
    if type == 'tickets':
        preprocessor = PreProcessor(email_cleaning=True,
                                    custom_cleaning=True,
                                    note_flag=False)
    else:
        print 'include source and subject columns in data'
        df[SOURCE_COL] = 1
        df[SUBJECT_COL] = ''
        preprocessor = PreProcessor(email_cleaning=True,
                                    custom_cleaning=True,
                                    desc_col='body',
                                    note_flag=True)
    df1 = preprocessor._cleaning(df)
    return df1