def test_merge_most_common_chars_count(self): ### default values of most common chars for both profiles text_profile1 = TextProfiler("Name") sample1 = pd.Series(["this is test,", " this is a test sentence"]) text_profile1.update(sample1) text_profile2 = TextProfiler("Name") sample2 = pd.Series(["this is", "this"]) text_profile2.update(sample2) text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile # as merged profile's vocab_count length is None, it is set to # the length of the merged vocab_count, which is 10 expected_vocab_count = { 's': 10, 't': 9, ' ': 8, 'i': 7, 'e': 5, 'h': 4, 'n': 2, ',': 1, 'a': 1, 'c': 1 } self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) ### one profile has default values of most common chars ### the other profile has it set text_profile1._top_k_chars = 3 text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile # as merged profile's vocab_count length is None, it is set to # the length of the merged vocab_count, which is 10 expected_vocab_count = { 's': 10, 't': 9, ' ': 8, 'i': 7, 'e': 5, 'h': 4, 'n': 2, ',': 1, 'a': 1, 'c': 1 } self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) ### equal number of most common chars text_profile1._top_k_chars = 3 text_profile2._top_k_chars = 3 text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile expected_vocab_count = {'s': 10, 't': 9, ' ': 8} self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) ### different number of most common chars text_profile1._top_k_chars = 2 text_profile2._top_k_chars = 3 text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile expected_vocab_count = {'s': 10, 't': 9, ' ': 8} self.assertDictEqual(expected_vocab_count, profile["vocab_count"])
def test_merge_most_common_chars_count(self): ### default values of most common chars for both profiles text_profile1 = TextProfiler("Name") sample1 = pd.Series(["this is test,", " this is a test sentence"]) text_profile1.update(sample1) text_profile2 = TextProfiler("Name") sample2 = pd.Series(["this is", "this"]) text_profile2.update(sample2) text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile # as merged profile's vocab_count length is None, it is set to # the length of the merged vocab_count, which is 10 expected_vocab_count = { "s": 10, "t": 9, " ": 8, "i": 7, "e": 5, "h": 4, "n": 2, ",": 1, "a": 1, "c": 1, } self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) ### one profile has default values of most common chars ### the other profile has it set text_profile1._top_k_chars = 3 text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile # as merged profile's vocab_count length is None, it is set to # the length of the merged vocab_count, which is 10 expected_vocab_count = { "s": 10, "t": 9, " ": 8, "i": 7, "e": 5, "h": 4, "n": 2, ",": 1, "a": 1, "c": 1, } self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) ### equal number of most common chars text_profile1._top_k_chars = 3 text_profile2._top_k_chars = 3 text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile expected_vocab_count = {"s": 10, "t": 9, " ": 8} self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) ### different number of most common chars text_profile1._top_k_chars = 2 text_profile2._top_k_chars = 3 text_profile3 = text_profile1 + text_profile2 profile = text_profile3.profile expected_vocab_count = {"s": 10, "t": 9, " ": 8} self.assertDictEqual(expected_vocab_count, profile["vocab_count"])