示例#1
0
    def test_if_first_column_is_non_numeric_then_raise_error(self):
        test_df = pd.DataFrame(
            {"non_text_column": [8000, 200], "description": ["spam", "ham"]}
        )

        with pytest.raises(TypeError):
            ngram_analysis.clean_input_data(test_df)
示例#2
0
    def test_for_special_keywords(self):
        test_df = pd.DataFrame(
            {
                "description": [
                    "Stress Free Planning For Your Big Day! Customized & All Inclusive Packages. |  no_description2",
                    "{=VenuePrice.Venue} Venue Packages Starting At {=VenuePrice.Start_price} Per Person.\nLearn More!",
                    "{KeyWord:Exceptional Services}. $90,000 |  Views of Vegas Strip Skyline & Sunrise Mountain Range!",
                    "Tie The Knot At One Of The Best Weddings'",
                ]
            }
        )
        result_df = ngram_analysis.clean_input_data(test_df)
        result_series = result_df["cleaned_text"]

        assert_series = pd.Series(
            [
                "stress free planning for your big day customized & all inclusive packages | no_description2",
                "{=venuepricevenue} venue packages starting at {=venuepricestart_price} per person learn more ",
                "{keywordexceptionalservices} $90000 | views of vegas strip skyline & sunrise mountain range ",
                "tie the knot at one of the best weddings'",
            ],
            name="cleaned_text",
        )

        pandas.util.testing.assert_series_equal(result_series, assert_series)
示例#3
0
    def test_digits_after_cleaning_being_togheter(self):
        test_df = pd.DataFrame({"description": ["Number is 1 800 800", "$200,000.45"]})
        result_df = ngram_analysis.clean_input_data(test_df)
        result_series = result_df["cleaned_text"]

        assert_series = pd.Series(
            ["number is 1800800", "$20000045"], name="cleaned_text"
        )

        pandas.util.testing.assert_series_equal(result_series, assert_series)
示例#4
0
    def test_for_no_multiple_spaces_present(self):
        test_df = pd.DataFrame(
            {"description": ["Num.ber ... is 1 800 800", "$200,000...45"]}
        )
        result_df = ngram_analysis.clean_input_data(test_df)
        test_series = result_df["cleaned_text"]

        assert_series = pd.Series(
            ["num ber is 1800800", "$20000045"], name="cleaned_text"
        )

        pandas.util.testing.assert_series_equal(test_series, assert_series)
示例#5
0
    def test_lemmatization_support(self):
        test_df = pd.DataFrame(
            {
                "description": [
                    "Who's getting popcorn?",
                    "Lost your card? Freeze it in seconds to keep it safe ❄️\nDefrost it if you find it again 🔥",
                    "stress free planning for your big day customized & all inclusive packages"
                ]
            }
        )
        result_df = ngram_analysis.clean_input_data(test_df, lemmatize=True)
        result_series = result_df["cleaned_text"]

        assert_series = pd.Series(
            [
                "who's get popcorn",
                "lose -PRON- card freeze -PRON- in second to keep -PRON- safe ❄️ defrost -PRON- if -PRON- find -PRON- again 🔥",
                "stress free planning for -PRON- big day customize & all inclusive package",
            ],
            name="cleaned_text",
        )

        pandas.util.testing.assert_series_equal(result_series, assert_series)
示例#6
0
    def test_for_emoji_support(self):
        test_df = pd.DataFrame(
            {
                "description": [
                    "Top marks for customer communication and our test_company app 🤓",
                    "Get great exchange rates abroad, and no fees on card payments! 🙌",
                    "Freeze it in seconds to keep it safe ❄️\nDefrost it if you find it again 🔥",
                ]
            }
        )
        result_df = ngram_analysis.clean_input_data(test_df)
        result_series = result_df["cleaned_text"]

        assert_series = pd.Series(
            [
                "top marks for customer communication and our test_company app 🤓",
                "get great exchange rates abroad and no fees on card payments 🙌",
                "freeze it in seconds to keep it safe ❄️ defrost it if you find it again 🔥",
            ],
            name="cleaned_text",
        )

        pandas.util.testing.assert_series_equal(result_series, assert_series)