Exemplo n.º 1
0
    def test_merge_most_common_chars_count(self):
        ### default values of most common chars for both profiles
        text_profile1 = TextProfiler("Name")
        sample1 = pd.Series(["this is test,", " this is a test sentence"])
        text_profile1.update(sample1)

        text_profile2 = TextProfiler("Name")
        sample2 = pd.Series(["this is", "this"])
        text_profile2.update(sample2)

        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        # as merged profile's vocab_count length is None, it is set to
        # the length of the merged vocab_count, which is 10
        expected_vocab_count = {
            's': 10,
            't': 9,
            ' ': 8,
            'i': 7,
            'e': 5,
            'h': 4,
            'n': 2,
            ',': 1,
            'a': 1,
            'c': 1
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        ### one profile has default values of most common chars
        ### the other profile has it set
        text_profile1._top_k_chars = 3
        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        # as merged profile's vocab_count length is None, it is set to
        # the length of the merged vocab_count, which is 10
        expected_vocab_count = {
            's': 10,
            't': 9,
            ' ': 8,
            'i': 7,
            'e': 5,
            'h': 4,
            'n': 2,
            ',': 1,
            'a': 1,
            'c': 1
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        ### equal number of most common chars
        text_profile1._top_k_chars = 3
        text_profile2._top_k_chars = 3
        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        expected_vocab_count = {'s': 10, 't': 9, ' ': 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        ### different number of most common chars
        text_profile1._top_k_chars = 2
        text_profile2._top_k_chars = 3
        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        expected_vocab_count = {'s': 10, 't': 9, ' ': 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])
Exemplo n.º 2
0
    def test_merge_most_common_chars_count(self):
        ### default values of most common chars for both profiles
        text_profile1 = TextProfiler("Name")
        sample1 = pd.Series(["this is test,", " this is a test sentence"])
        text_profile1.update(sample1)

        text_profile2 = TextProfiler("Name")
        sample2 = pd.Series(["this is", "this"])
        text_profile2.update(sample2)

        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        # as merged profile's vocab_count length is None, it is set to
        # the length of the merged vocab_count, which is 10
        expected_vocab_count = {
            "s": 10,
            "t": 9,
            " ": 8,
            "i": 7,
            "e": 5,
            "h": 4,
            "n": 2,
            ",": 1,
            "a": 1,
            "c": 1,
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        ### one profile has default values of most common chars
        ### the other profile has it set
        text_profile1._top_k_chars = 3
        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        # as merged profile's vocab_count length is None, it is set to
        # the length of the merged vocab_count, which is 10
        expected_vocab_count = {
            "s": 10,
            "t": 9,
            " ": 8,
            "i": 7,
            "e": 5,
            "h": 4,
            "n": 2,
            ",": 1,
            "a": 1,
            "c": 1,
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        ### equal number of most common chars
        text_profile1._top_k_chars = 3
        text_profile2._top_k_chars = 3
        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        expected_vocab_count = {"s": 10, "t": 9, " ": 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        ### different number of most common chars
        text_profile1._top_k_chars = 2
        text_profile2._top_k_chars = 3
        text_profile3 = text_profile1 + text_profile2
        profile = text_profile3.profile

        expected_vocab_count = {"s": 10, "t": 9, " ": 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])