def apply_grammar_check(heading: str, new_dataframe: pd.DataFrame, text_column: dict, parallelisation_method: str = DEFAULT_PARALLEL_METHOD): grammar_checks_steps = [ (GRAMMAR_CHECK_SCORE_COL, text_column, grammar_check_score), (GRAMMAR_CHECK_COL, GRAMMAR_CHECK_SCORE_COL, grammar_quality), ] generate_features(heading, grammar_checks_steps, new_dataframe, parallelisation_method)
def apply_spelling_check( heading: str, new_dataframe: pd.DataFrame, text_column: dict, parallelisation_method: str = DEFAULT_PARALLEL_METHOD): spelling_checks_steps = [ (SPELLING_QUALITY_SCORE_COL, text_column, spelling_quality_score), (SPELLING_QUALITY_COL, SPELLING_QUALITY_SCORE_COL, spelling_quality), (SPELLING_QUALITY_SUMMARISED_COL, SPELLING_QUALITY_COL, spelling_quality_summarised), ] generate_features(heading, spelling_checks_steps, new_dataframe, parallelisation_method)
def apply_ease_of_reading_check(heading: str, new_dataframe: pd.DataFrame, text_column: dict, parallelisation_method: str = DEFAULT_PARALLEL_METHOD): ease_of_reading_steps = [ (EASE_OF_READING_SCORE_COL, text_column, ease_of_reading_score), (EASE_OF_READING_COL, EASE_OF_READING_SCORE_COL, ease_of_reading), (EASE_OF_READING_SUMMARISED_COL, EASE_OF_READING_COL, ease_of_reading_summarised), ] generate_features( heading, ease_of_reading_steps, new_dataframe, parallelisation_method )
def apply_high_level_features( heading: str, new_dataframe: pd.DataFrame, text_column: dict, parallelisation_method: str = DEFAULT_PARALLEL_METHOD): high_level_features_steps = [ (SENTIMENT_POLARITY_SCORE_COL, text_column, sentiment_polarity_score), (SENTIMENT_POLARITY_COL, SENTIMENT_POLARITY_SCORE_COL, sentiment_polarity), (SENTIMENT_POLARITY_SUMMARISED_COL, SENTIMENT_POLARITY_COL, sentiment_polarity_summarised), (SENTIMENT_SUBJECTIVITY_SCORE_COL, text_column, sentiment_subjectivity_score), (SENTIMENT_SUBJECTIVITY_COL, SENTIMENT_SUBJECTIVITY_SCORE_COL, sentiment_subjectivity), (SENTIMENT_SUBJECTIVITY_SUMMARISED_COL, SENTIMENT_SUBJECTIVITY_COL, sentiment_subjectivity_summarised), ] generate_features(heading, high_level_features_steps, new_dataframe, parallelisation_method)
def apply_granular_features( heading: str, new_dataframe: pd.DataFrame, text_column: dict, parallelisation_method: str = DEFAULT_PARALLEL_METHOD): granular_features_steps = [ (SENTENCES_COUNT_COL, text_column, count_sentences), (CHARACTERS_COUNT_COL, text_column, count_chars), (SPACES_COUNT_COL, text_column, count_spaces), (COUNT_WORDS_COL, text_column, count_words), (DUPLICATES_COUNT_COL, text_column, count_duplicates), (CHARS_EXCL_SPACES_COUNT_COL, text_column, count_characters_excluding_spaces), (EMOJI_COUNT_COL, text_column, count_emojis), (WHOLE_NUMBERS_COUNT_COL, text_column, count_whole_numbers), (ALPHA_NUMERIC_COUNT_COL, text_column, count_alpha_numeric), (NON_ALPHA_NUMERIC_COUNT_COL, text_column, count_non_alpha_numeric), (PUNCTUATIONS_COUNT_COL, text_column, count_punctuations), (STOP_WORDS_COUNT_COL, text_column, count_stop_words), (DATES_COUNT_COL, text_column, count_dates), ] generate_features(heading, granular_features_steps, new_dataframe, parallelisation_method)
def apply_granular_features( heading: str, new_dataframe: pd.DataFrame, text_column: str, parallelisation_method: str = DEFAULT_PARALLEL_METHOD): steps_for_features = [ (SENTENCES_COUNT_COL, text_column, count_sentences), (CHARACTERS_COUNT_COL, text_column, count_chars), (REPEATED_LETTERS_COUNT_COL, text_column, count_repeated_letters), (SPACES_COUNT_COL, text_column, count_spaces), (CHARS_EXCL_SPACES_COUNT_COL, text_column, count_characters_excluding_spaces), (REPEATED_SPACES_COUNT_COL, text_column, count_repeated_spaces), (WHITESPACES_COUNT_COL, text_column, count_whitespaces), (CHARS_EXCL_WHITESPACES_COUNT_COL, text_column, count_characters_excluding_whitespaces), (REPEATED_WHITESPACES_COUNT_COL, text_column, count_repeated_whitespaces), (COUNT_WORDS_COL, text_column, count_words), (DUPLICATES_COUNT_COL, text_column, count_duplicates), (EMOJI_COUNT_COL, text_column, count_emojis), (REPEATED_DIGITS_COUNT_COL, text_column, count_repeated_digits), (WHOLE_NUMBERS_COUNT_COL, text_column, count_whole_numbers), (ALPHA_NUMERIC_COUNT_COL, text_column, count_alpha_numeric), (NON_ALPHA_NUMERIC_COUNT_COL, text_column, count_non_alpha_numeric), (PUNCTUATIONS_COUNT_COL, text_column, count_punctuations), (REPEATED_PUNCTUATIONS_COUNT_COL, text_column, count_repeated_punctuations), (STOP_WORDS_COUNT_COL, text_column, count_stop_words), (DATES_COUNT_COL, text_column, count_dates), (NOUN_PHASE_COUNT_COL, text_column, count_noun_phase), (ENGLISH_CHARACTERS_COUNT_COL, text_column, count_english_chars), (NON_ENGLISH_CHARACTERS_COUNT_COL, text_column, count_non_english_chars) ] generate_features(heading, steps_for_features, new_dataframe, parallelisation_method)