def preparing_data(data): data = data[['Level', 'Type_of_Job', 'Description']] data = data[data.Level != 'Not Applicable'] data = md.cleaning_text_regular_exp(data, 'Description') data = md.removing_stop_words(data) #data_stemming = steamming(data_no_stop_words) data = md.lemmatizing(data, "token_no_stop_words") data_cleaned = md.data_categorization(data) return (data_cleaned)
def preparing_data(data): data = data[['Level', 'Type_of_Job', 'Description']] data = data[data.Level != 'Not Applicable'] data_no_regular_expression = md.cleaning_text_regular_exp( data, 'Description') data_no_stop_words = md.removing_stop_words(data_no_regular_expression) # data_stemming = steamming(data_no_stop_words) data_lemmatizing = md.lemmatizing(data_no_stop_words, "Description") data_categorical = md.data_categorization(data_lemmatizing) return data_categorical