def test_eq(self): c = CategoryMap(hello_world_its_me) eq = CategoryMap(hello_world_its_me) neq = CategoryMap({'a': ['hello', 'me'], 'b': ['its', 'world']}) self.assertEqual(c, eq) self.assertNotEqual(c, neq)
def test_with_realistic_data(self): all_descriptions = [ "ach debit xxxxx0987 electricity ", "ach debit 1-1abcdef gas ", "debit card purchase xxxxx1234 apples #118 city st ", "ach webrecur xxxxx6161 school ", "debit card purchase xxxxx4321 coffee city st ", "debit card purchase xxxxx1234 razor blades m5789 city st ", "debit card purchase xxxxx4321 sandwich meat city st ", "recurring debit card xxxxx5678 water xxxxx9622 st ", "debit card purchase xxxxx4321 pipe tobacco xxxxx4000 st ", "debit card purchase xxxxx1234 bar soap xxxxx9100 st ", "debit card purchase xxxxx5678 shoes city st " ] known_categories = { 'food': ['apples', 'coffee'], 'bills': ['electricity', 'gas'], 'home': ['bar soap'] } expected = CategoryMap({ 'food': ['apples', 'coffee', 'sandwich meat'], 'bills': ['electricity', 'gas', 'school', 'water'], 'home': ['bar soap', 'razor blades', 'pipe tobacco', 'shoes'] }) c = Categorizer(CategoryMap(known_categories), InputFake(expected)) actual = c.categorize(all_descriptions) self.assertEqual(expected, actual)
def test_should_ignore_duplicate_descriptions(self): all_descriptions = [ 'hello', 'honey', 'its', 'me', 'your', 'husband', 'ralph', 'honey xyz', 'your xyz', 'ralph xyz' ] known_categories = { 'a': ['hello'], 'b': ['its'], 'c': ['husband'] } expected = CategoryMap({ 'a': ['hello', 'honey'], 'b': ['its', 'me'], 'c': ['your', 'husband'], 'd': ['ralph'] }) c = Categorizer(CategoryMap(known_categories), InputFake(expected)) actual = c.categorize(all_descriptions) self.assertEqual(expected, actual)
def test_keyword_exists(self): c = CategoryMap(hello_world_its_me) contains = 'world on fire' doesnt = 'rabbit punch' self.assertTrue(c.keyword_exists(contains)) self.assertFalse(c.keyword_exists(doesnt))
def test_learns_new_keywords(self): c = CategoryMap(hello_world_its_me) new_keyword = 'bonesaw' self.assertFalse(c.keyword_exists(new_keyword)) c.add('b', new_keyword) self.assertTrue(c.keyword_exists(new_keyword)) new_keyword = 'is ready' self.assertFalse(c.keyword_exists(new_keyword)) c.add('c', new_keyword) self.assertTrue(c.keyword_exists(new_keyword))
def test_add_and_get(self): expected = hello_world_its_me c = CategoryMap({'a': ['hello'], 'b': ['its']}) c.add('a', 'world') c.add('b', 'me') self.assertListEqual(c.get('a'), expected['a']) self.assertListEqual(c.get('b'), expected['b']) self.assertListEqual(c['a'], expected['a']) self.assertListEqual(c['b'], expected['b'])
def test_ensures_intializing_dict_enforces_case(self): c = CategoryMap({ 'A': ['HELLO', 'HONEY'], 'B': ['ITS', 'ME'], 'C': ['YOUR', 'HUSBAND'], 'D': ['RALPH'] }) for category in c: # self.assertTrue(category.islower()) keywords = c[category] for keyword in keywords: self.assertTrue(keyword.islower())
def test_should_ignore_known_descriptions(self): all_descriptions = [ 'bing bang boom hello', 'its me your husband ralph', 'the world is so very big', ] known_categories = { 'a': ['hello'], 'b': ['husband'] } expected = CategoryMap({ 'a': ['hello'], 'b': ['husband'], 'c': ['world'] }) c = Categorizer(CategoryMap(known_categories), InputFake(expected)) actual = c.categorize(all_descriptions) self.assertEqual(expected, actual)
def test_maintains_keywords_as_lowercase(self): c = CategoryMap() c.add('A', 'HELLO') c.add('A', 'HONEY') c.add('B', 'ITS') c.add('B', 'ME') c.add('C', 'YOUR') c.add('C', 'HUSBAND') c.add('D', 'RALPH') for category in c: keywords = c[category] for keyword in keywords: self.assertTrue(keyword.islower())
def test_maintains_categories_as_lowercase(self): c = CategoryMap() c.add('A', 'HELLO') c.add('A', 'HONEY') c.add('B', 'ITS') c.add('B', 'ME') c.add('C', 'YOUR') c.add('C', 'HUSBAND') c.add('D', 'RALPH') for category in c: self.assertTrue(category.islower())
def test_finds_keyword_among_noise(self): c = CategoryMap(hello_world_its_me) description = 'zim zam BORKBIRK.hello-XXXX9990' self.assertTrue(c.keyword_exists(description))
def test_contains(self): c = CategoryMap({'a': ['hello', 'world'], 'b': ['its', 'me']}) self.assertTrue('a' in c)
def test_len(self): c = CategoryMap({'a': [], 'b': []}) self.assertEqual(2, len(c))
def test_iterates(self): c = CategoryMap(hello_world_its_me) for category in c: self.assertListEqual(c[category], hello_world_its_me[category])
def get_categories(self): return CategoryMap(try_get_json(self.paths['categories']))