Пример #1
0
def apply_grammar_check(heading: str,
                        new_dataframe: pd.DataFrame,
                        text_column: dict,
                        parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
    grammar_checks_steps = [
        (GRAMMAR_CHECK_SCORE_COL, text_column, grammar_check_score),
        (GRAMMAR_CHECK_COL, GRAMMAR_CHECK_SCORE_COL, grammar_quality),
    ]
    generate_features(heading, grammar_checks_steps, new_dataframe,
                      parallelisation_method)
Пример #2
0
def apply_spelling_check(
        heading: str,
        new_dataframe: pd.DataFrame,
        text_column: dict,
        parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
    spelling_checks_steps = [
        (SPELLING_QUALITY_SCORE_COL, text_column, spelling_quality_score),
        (SPELLING_QUALITY_COL, SPELLING_QUALITY_SCORE_COL, spelling_quality),
        (SPELLING_QUALITY_SUMMARISED_COL, SPELLING_QUALITY_COL,
         spelling_quality_summarised),
    ]
    generate_features(heading, spelling_checks_steps, new_dataframe,
                      parallelisation_method)
Пример #3
0
def apply_ease_of_reading_check(heading: str,
                                new_dataframe: pd.DataFrame,
                                text_column: dict,
                                parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
    ease_of_reading_steps = [
        (EASE_OF_READING_SCORE_COL, text_column, ease_of_reading_score),
        (EASE_OF_READING_COL, EASE_OF_READING_SCORE_COL, ease_of_reading),
        (EASE_OF_READING_SUMMARISED_COL, EASE_OF_READING_COL, ease_of_reading_summarised),
    ]
    generate_features(
        heading, ease_of_reading_steps,
        new_dataframe, parallelisation_method
    )
Пример #4
0
def apply_high_level_features(
        heading: str,
        new_dataframe: pd.DataFrame,
        text_column: dict,
        parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
    high_level_features_steps = [
        (SENTIMENT_POLARITY_SCORE_COL, text_column, sentiment_polarity_score),
        (SENTIMENT_POLARITY_COL, SENTIMENT_POLARITY_SCORE_COL,
         sentiment_polarity),
        (SENTIMENT_POLARITY_SUMMARISED_COL, SENTIMENT_POLARITY_COL,
         sentiment_polarity_summarised),
        (SENTIMENT_SUBJECTIVITY_SCORE_COL, text_column,
         sentiment_subjectivity_score),
        (SENTIMENT_SUBJECTIVITY_COL, SENTIMENT_SUBJECTIVITY_SCORE_COL,
         sentiment_subjectivity),
        (SENTIMENT_SUBJECTIVITY_SUMMARISED_COL, SENTIMENT_SUBJECTIVITY_COL,
         sentiment_subjectivity_summarised),
    ]
    generate_features(heading, high_level_features_steps, new_dataframe,
                      parallelisation_method)
Пример #5
0
def apply_granular_features(
        heading: str,
        new_dataframe: pd.DataFrame,
        text_column: dict,
        parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
    granular_features_steps = [
        (SENTENCES_COUNT_COL, text_column, count_sentences),
        (CHARACTERS_COUNT_COL, text_column, count_chars),
        (SPACES_COUNT_COL, text_column, count_spaces),
        (COUNT_WORDS_COL, text_column, count_words),
        (DUPLICATES_COUNT_COL, text_column, count_duplicates),
        (CHARS_EXCL_SPACES_COUNT_COL, text_column,
         count_characters_excluding_spaces),
        (EMOJI_COUNT_COL, text_column, count_emojis),
        (WHOLE_NUMBERS_COUNT_COL, text_column, count_whole_numbers),
        (ALPHA_NUMERIC_COUNT_COL, text_column, count_alpha_numeric),
        (NON_ALPHA_NUMERIC_COUNT_COL, text_column, count_non_alpha_numeric),
        (PUNCTUATIONS_COUNT_COL, text_column, count_punctuations),
        (STOP_WORDS_COUNT_COL, text_column, count_stop_words),
        (DATES_COUNT_COL, text_column, count_dates),
    ]
    generate_features(heading, granular_features_steps, new_dataframe,
                      parallelisation_method)
Пример #6
0
def apply_granular_features(
        heading: str,
        new_dataframe: pd.DataFrame,
        text_column: str,
        parallelisation_method: str = DEFAULT_PARALLEL_METHOD):
    steps_for_features = [
        (SENTENCES_COUNT_COL, text_column, count_sentences),
        (CHARACTERS_COUNT_COL, text_column, count_chars),
        (REPEATED_LETTERS_COUNT_COL, text_column, count_repeated_letters),
        (SPACES_COUNT_COL, text_column, count_spaces),
        (CHARS_EXCL_SPACES_COUNT_COL, text_column,
         count_characters_excluding_spaces),
        (REPEATED_SPACES_COUNT_COL, text_column, count_repeated_spaces),
        (WHITESPACES_COUNT_COL, text_column, count_whitespaces),
        (CHARS_EXCL_WHITESPACES_COUNT_COL, text_column,
         count_characters_excluding_whitespaces),
        (REPEATED_WHITESPACES_COUNT_COL, text_column,
         count_repeated_whitespaces),
        (COUNT_WORDS_COL, text_column, count_words),
        (DUPLICATES_COUNT_COL, text_column, count_duplicates),
        (EMOJI_COUNT_COL, text_column, count_emojis),
        (REPEATED_DIGITS_COUNT_COL, text_column, count_repeated_digits),
        (WHOLE_NUMBERS_COUNT_COL, text_column, count_whole_numbers),
        (ALPHA_NUMERIC_COUNT_COL, text_column, count_alpha_numeric),
        (NON_ALPHA_NUMERIC_COUNT_COL, text_column, count_non_alpha_numeric),
        (PUNCTUATIONS_COUNT_COL, text_column, count_punctuations),
        (REPEATED_PUNCTUATIONS_COUNT_COL, text_column,
         count_repeated_punctuations),
        (STOP_WORDS_COUNT_COL, text_column, count_stop_words),
        (DATES_COUNT_COL, text_column, count_dates),
        (NOUN_PHASE_COUNT_COL, text_column, count_noun_phase),
        (ENGLISH_CHARACTERS_COUNT_COL, text_column, count_english_chars),
        (NON_ENGLISH_CHARACTERS_COUNT_COL, text_column,
         count_non_english_chars)
    ]
    generate_features(heading, steps_for_features, new_dataframe,
                      parallelisation_method)