def test_eq(self):
        c = CategoryMap(hello_world_its_me)
        eq = CategoryMap(hello_world_its_me)
        neq = CategoryMap({'a': ['hello', 'me'], 'b': ['its', 'world']})

        self.assertEqual(c, eq)
        self.assertNotEqual(c, neq)
示例#2
0
    def test_with_realistic_data(self):
        all_descriptions = [
            "ach debit       xxxxx0987 electricity ",
            "ach debit       1-1abcdef gas ",
            "debit card purchase   xxxxx1234 apples #118            city st ",
            "ach webrecur   xxxxx6161 school ",
            "debit card purchase   xxxxx4321 coffee    city st ",
            "debit card purchase   xxxxx1234 razor blades m5789          city      st ",
            "debit card purchase   xxxxx4321 sandwich meat             city st ",
            "recurring debit card  xxxxx5678 water             xxxxx9622 st ",
            "debit card purchase   xxxxx4321 pipe tobacco           xxxxx4000  st ",
            "debit card purchase   xxxxx1234 bar soap       xxxxx9100 st ",
            "debit card purchase   xxxxx5678 shoes    city      st "
        ]

        known_categories = {
            'food': ['apples', 'coffee'],
            'bills': ['electricity', 'gas'],
            'home': ['bar soap']
        }

        expected = CategoryMap({
            'food': ['apples', 'coffee', 'sandwich meat'],
            'bills': ['electricity', 'gas', 'school', 'water'],
            'home': ['bar soap', 'razor blades', 'pipe tobacco', 'shoes']    
        })

        c = Categorizer(CategoryMap(known_categories), InputFake(expected))
        actual = c.categorize(all_descriptions)
            
        self.assertEqual(expected, actual)
示例#3
0
    def test_should_ignore_duplicate_descriptions(self):
        all_descriptions = [
            'hello',
            'honey',
            'its',
            'me',
            'your',
            'husband',
            'ralph',
            'honey xyz',
            'your xyz',
            'ralph xyz'
        ]

        known_categories = {
            'a': ['hello'],
            'b': ['its'],
            'c': ['husband']
        }

        expected = CategoryMap({
            'a': ['hello', 'honey'],
            'b': ['its', 'me'],
            'c': ['your', 'husband'],
            'd': ['ralph']
        })

        c = Categorizer(CategoryMap(known_categories), InputFake(expected))
        actual = c.categorize(all_descriptions)
    
        self.assertEqual(expected, actual)
    def test_keyword_exists(self):
        c = CategoryMap(hello_world_its_me)

        contains = 'world on fire'
        doesnt = 'rabbit punch'

        self.assertTrue(c.keyword_exists(contains))
        self.assertFalse(c.keyword_exists(doesnt))
    def test_learns_new_keywords(self):
        c = CategoryMap(hello_world_its_me)

        new_keyword = 'bonesaw'
        self.assertFalse(c.keyword_exists(new_keyword))

        c.add('b', new_keyword)
        self.assertTrue(c.keyword_exists(new_keyword))

        new_keyword = 'is ready'
        self.assertFalse(c.keyword_exists(new_keyword))

        c.add('c', new_keyword)
        self.assertTrue(c.keyword_exists(new_keyword))
    def test_add_and_get(self):
        expected = hello_world_its_me

        c = CategoryMap({'a': ['hello'], 'b': ['its']})
        c.add('a', 'world')
        c.add('b', 'me')

        self.assertListEqual(c.get('a'), expected['a'])
        self.assertListEqual(c.get('b'), expected['b'])
        self.assertListEqual(c['a'], expected['a'])
        self.assertListEqual(c['b'], expected['b'])
    def test_ensures_intializing_dict_enforces_case(self):
        c = CategoryMap({
            'A': ['HELLO', 'HONEY'],
            'B': ['ITS', 'ME'],
            'C': ['YOUR', 'HUSBAND'],
            'D': ['RALPH']
        })

        for category in c:
            # self.assertTrue(category.islower())
            keywords = c[category]
            for keyword in keywords:
                self.assertTrue(keyword.islower())
示例#8
0
    def test_should_ignore_known_descriptions(self):
        all_descriptions = [
            'bing bang boom hello',
            'its me your husband ralph',
            'the world is so very big',
        ]

        known_categories = {
            'a': ['hello'],
            'b': ['husband']
        }

        expected = CategoryMap({
            'a': ['hello'],
            'b': ['husband'],
            'c': ['world']
        })

        c = Categorizer(CategoryMap(known_categories), InputFake(expected))
        actual = c.categorize(all_descriptions)

        self.assertEqual(expected, actual)
    def test_maintains_keywords_as_lowercase(self):
        c = CategoryMap()
        c.add('A', 'HELLO')
        c.add('A', 'HONEY')
        c.add('B', 'ITS')
        c.add('B', 'ME')
        c.add('C', 'YOUR')
        c.add('C', 'HUSBAND')
        c.add('D', 'RALPH')

        for category in c:
            keywords = c[category]
            for keyword in keywords:
                self.assertTrue(keyword.islower())
示例#10
0
    def test_maintains_categories_as_lowercase(self):
        c = CategoryMap()
        c.add('A', 'HELLO')
        c.add('A', 'HONEY')
        c.add('B', 'ITS')
        c.add('B', 'ME')
        c.add('C', 'YOUR')
        c.add('C', 'HUSBAND')
        c.add('D', 'RALPH')

        for category in c:
            self.assertTrue(category.islower())
示例#11
0
    def test_finds_keyword_among_noise(self):
        c = CategoryMap(hello_world_its_me)

        description = 'zim zam BORKBIRK.hello-XXXX9990'

        self.assertTrue(c.keyword_exists(description))
示例#12
0
    def test_contains(self):
        c = CategoryMap({'a': ['hello', 'world'], 'b': ['its', 'me']})

        self.assertTrue('a' in c)
示例#13
0
 def test_len(self):
     c = CategoryMap({'a': [], 'b': []})
     self.assertEqual(2, len(c))
示例#14
0
    def test_iterates(self):
        c = CategoryMap(hello_world_its_me)

        for category in c:
            self.assertListEqual(c[category], hello_world_its_me[category])
示例#15
0
 def get_categories(self):
     return CategoryMap(try_get_json(self.paths['categories']))