def test_options_most_common_chars_count(self): # None value for number of common chars options = TextProfilerOptions() options.top_k_chars = None text_profile = TextProfiler("Name", options=options) sample = pd.Series( ["this is test,", " this is a test sentence", "this is", "this"]) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = { 's': 10, 't': 9, ' ': 8, 'i': 7, 'e': 5, 'h': 4, 'n': 2, ',': 1, 'a': 1, 'c': 1 } self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) # set number of common chars to 3 options.top_k_chars = 3 text_profile = TextProfiler("Name", options=options) sample = pd.Series( ["this is test,", " this is a test sentence", "this is", "this"]) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = {'s': 10, 't': 9, ' ': 8} self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) # change number of common chars options.top_k_chars = 2 text_profile = TextProfiler("Name", options=options) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = {'s': 10, 't': 9} self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) # change number of common chars greater than length of vocab_counts list options.top_k_chars = 300 text_profile = TextProfiler("Name", options=options) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = { 's': 10, 't': 9, ' ': 8, 'i': 7, 'e': 5, 'h': 4, 'n': 2, ',': 1, 'a': 1, 'c': 1 } self.assertDictEqual(expected_vocab_count, profile["vocab_count"])
def test_options_most_common_chars_count(self): # None value for number of common chars options = TextProfilerOptions() options.top_k_chars = None text_profile = TextProfiler("Name", options=options) sample = pd.Series( ["this is test,", " this is a test sentence", "this is", "this"]) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = { "s": 10, "t": 9, " ": 8, "i": 7, "e": 5, "h": 4, "n": 2, ",": 1, "a": 1, "c": 1, } self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) # set number of common chars to 3 options.top_k_chars = 3 text_profile = TextProfiler("Name", options=options) sample = pd.Series( ["this is test,", " this is a test sentence", "this is", "this"]) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = {"s": 10, "t": 9, " ": 8} self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) # change number of common chars options.top_k_chars = 2 text_profile = TextProfiler("Name", options=options) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = {"s": 10, "t": 9} self.assertDictEqual(expected_vocab_count, profile["vocab_count"]) # change number of common chars greater than length of vocab_counts list options.top_k_chars = 300 text_profile = TextProfiler("Name", options=options) text_profile.update(sample) profile = text_profile.profile expected_vocab_count = { "s": 10, "t": 9, " ": 8, "i": 7, "e": 5, "h": 4, "n": 2, ",": 1, "a": 1, "c": 1, } self.assertDictEqual(expected_vocab_count, profile["vocab_count"])