def test_mine_db_rules(self): analyzer = GCA(self.db_rules, 1 / 7) #percentage indicated in publication analyzer.clean_database() analyzer.mine() self.assertEqual(len(analyzer.lcg_into_list()), 10) expected_LGC = [] expected_LGC.append( GCA.Node(2 / 7, set(['a', 'c', 'e', 'g', 'i']), [['a', 'e'], ['a', 'g']], None)) expected_LGC.append( GCA.Node(2 / 7, set(['b', 'c', 'e', 'g', 'i']), [['b']], None)) expected_LGC.append( GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']), [['c', 'f'], ['c', 'h']], None)) expected_LGC.append( GCA.Node(1 / 7, set(['a', 'd', 'f', 'h', 'i']), [['d']], None)) expected_LGC.append( GCA.Node(4 / 7, set(['c', 'e', 'g', 'i']), [['e'], ['g']], None)) expected_LGC.append( GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None)) expected_LGC.append( GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None)) expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None)) expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None)) expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None)) for index, expected in enumerate(expected_LGC): #check closure match = analyzer.search_node_with_closure(expected.closure) self.assertSequenceEqual(expected.closure, match.closure) #check support self.assertEqual(expected.support, match.support) #check generators for generator in expected.generators: match = analyzer.search_node_with_generator(None, generator) self.assertIsNotNone(match)
def test_MFCS_FromLattice(self): analyzer = GCA(self.db_rules, 1 / 7) # percentage indicated in publication analyzer.clean_database() analyzer.mine() rule_miner = RAMCM(analyzer.lcg_into_list()) lcg_S = rule_miner.MFCS_FromLattice( rule_miner.lcg, set(['a', 'c', 'f', 'h', 'i']), rule_miner._get_support(set(['a', 'c', 'f', 'h', 'i'])), 1 / 7, 1) self.assertEqual(len(lcg_S), 6) expected_LGC = [] expected_LGC.append( GCA.Node(2 / 7, set(['a', 'c', 'f', 'h', 'i']), [['c', 'f'], ['c', 'h']], None)) expected_LGC.append( GCA.Node(4 / 7, set(['a', 'c', 'i']), [['a', 'c']], None)) expected_LGC.append( GCA.Node(3 / 7, set(['a', 'f', 'h', 'i']), [['f'], ['h']], None)) expected_LGC.append(GCA.Node(6 / 7, set(['c', 'i']), [['c']], None)) expected_LGC.append(GCA.Node(5 / 7, set(['a', 'i']), [['a']], None)) expected_LGC.append(GCA.Node(7 / 7, set(['i']), [['i']], None)) for index, expected in enumerate(expected_LGC): # check closure match = analyzer.search_node_with_closure(expected.closure, lcg_S) self.assertSequenceEqual(expected.closure, match.closure) # check support self.assertEqual(expected.support, match.support) # check generators for generator in expected.generators: self.assertTrue( is_in_generators(generator, match.generators, True))
def setUp(self): """ Validate the developments with the indications published here: https://pdfs.semanticscholar.org/56a4/ec156b26225b5922182bacc4c5b26fd5a555.pdf """ self.db = [] self.db.append(['a', 'b', 'c', 'e', 'g', 'h']) self.db.append(['a', 'c', 'd', 'f', 'h']) self.db.append(['a', 'd', 'e', 'f', 'g', 'h']) self.db.append(['b', 'c', 'e', 'f', 'g', 'h']) self.db.append(['b', 'c', 'e']) self.db.append(['b', 'c']) self.db_rules = [] #database used for rules association mining self.db_rules.append(['a', 'c', 'e', 'g', 'i']) self.db_rules.append(['a', 'c', 'f', 'h', 'i']) self.db_rules.append(['a', 'd', 'f', 'h', 'i']) self.db_rules.append(['b', 'c', 'e', 'g', 'i']) self.db_rules.append(['a', 'c', 'e', 'g', 'i']) self.db_rules.append(['b', 'c', 'e', 'g', 'i']) self.db_rules.append(['a', 'c', 'f', 'h', 'i']) self.db_rules_integer = [ ] # database used for rules association mining with integer self.db_rules_integer.append([1, 3, 5, 7, 9]) self.db_rules_integer.append([1, 3, 6, 8, 9]) self.db_rules_integer.append([1, 4, 6, 8, 9]) self.db_rules_integer.append([2, 3, 5, 7, 9]) self.db_rules_integer.append([1, 3, 5, 7, 9]) self.db_rules_integer.append([2, 3, 5, 7, 9]) self.db_rules_integer.append([1, 3, 6, 8, 9]) self.analyzer = GCA([], 1) root = None a = GCA.Node(3, ('a'), ['a'], (1, 2, 3), root) b = GCA.Node(4, ('b'), ['b'], (1, 4, 5, 6), root) c = GCA.Node(5, ('c'), ['c'], (1, 2, 4, 5, 6), root) d = GCA.Node(2, ('d'), ['d'], (2, 3), root) e = GCA.Node(4, ('e'), ['e'], (1, 3, 4, 5), root) f = GCA.Node(3, ('f'), ['f'], (2, 3, 4), root) g = GCA.Node(3, ('g'), ['g'], (1, 3, 4), root) h = GCA.Node(4, ('h'), ['h'], (1, 2, 3, 4), root) self.L1 = [d, a, f, g, b, e, h, c] dc = GCA.Node(1, ('a', 'd', 'f', 'h', 'c'), ['d', 'c'], set([2]), d) de = GCA.Node(1, ('a', 'd', 'f', 'h', 'e'), ['d', 'e'], set([3]), d) dg = GCA.Node(1, ('a', 'd', 'f', 'h', 'e', 'g'), ['d', 'g'], set([3]), d) af = GCA.Node(2, ('a', 'f', 'h'), ['a', 'f'], (2, 3), a) ag = GCA.Node(2, ('a', 'h', 'e', 'g'), ['a', 'g'], (1, 3), a) ab = GCA.Node(1, ('a', 'h', 'b', 'c'), ['a', 'b'], set([1]), a) ac = GCA.Node(2, ('a', 'h', 'c'), ['a', 'c'], (1, 2), a) ae = GCA.Node(2, ('a', 'e'), ['a', 'e'], (1, 3), a) gb = GCA.Node(2, ('e', 'g', 'h', 'b', 'c'), ['g', 'b'], (1, 4), g) gc = GCA.Node(2, ('g', 'c'), ['g', 'c'], (1, 4), g) be = GCA.Node(3, ('b', 'c', 'e'), ['b', 'e'], (1, 4, 5), b) bh = GCA.Node(2, ('b', 'c', 'h'), ['b', 'h'], (1, 4), b) eh = GCA.Node(3, ('e', 'h'), ['e', 'h'], (1, 3, 4), e) ec = GCA.Node(3, ('e', 'c'), ['e', 'c'], (1, 4, 5), e) fg = GCA.Node(2, ('f', 'h', 'e', 'g'), ['f', 'g'], (3, 4), f) fb = GCA.Node(1, ('f', 'h', 'b', 'c'), ['f', 'b'], set([4]), f) fc = GCA.Node(2, ('f', 'h', 'c'), ['f', 'c'], (2, 4), f) fe = GCA.Node(2, ('f', 'h', 'e'), ['f', 'e'], (3, 4), f) hc = GCA.Node(3, ('h', 'c'), ['h', 'c'], (1, 2, 4), h) #Order here is important, it depends of the order at L1 self.L2 = [ dg, de, dc, af, ag, ab, ae, ac, fg, fb, fe, fc, gb, gc, be, bh, eh, ec, hc ]
def test_mine(self): analyzer = GCA( self.db, 0.16) #percentage to get a min_supp of 1 matching the publication analyzer.clean_database() analyzer.mine() #closed_items = analyzer.lcg_into_list() for double hash db_size = len(self.db) expected_LGC = [] expected_LGC.append( GCA.Node(2 / analyzer.db_length, set(['a', 'd', 'f', 'h']), [['d'], ['a', 'f']], None)) expected_LGC.append( GCA.Node(3 / analyzer.db_length, set(['a', 'h']), [['a']], None)) expected_LGC.append( GCA.Node(3 / analyzer.db_length, set(['f', 'h']), [['f']], None)) expected_LGC.append( GCA.Node(3 / analyzer.db_length, set(['e', 'g', 'h']), [['g'], ['e', 'h']], None)) expected_LGC.append( GCA.Node(4 / analyzer.db_length, set(['b', 'c']), [['b']], None)) expected_LGC.append( GCA.Node(4 / analyzer.db_length, set(['e']), [['e']], None)) expected_LGC.append( GCA.Node(4 / analyzer.db_length, set(['h']), [['h']], None)) expected_LGC.append( GCA.Node(5 / analyzer.db_length, set(['c']), [['c']], None)) expected_LGC.append( GCA.Node(1 / analyzer.db_length, set([ 'a', 'd', 'f', 'h', 'e', 'g' ]), [['d', 'g'], ['d', 'e'], ['a', 'f', 'g'], ['a', 'f', 'e']], None)) expected_LGC.append( GCA.Node(1 / analyzer.db_length, set(['a', 'd', 'f', 'h', 'c']), [['d', 'c'], ['a', 'f', 'c']], None)) #TODO: check with publication's authors since aheg appears in two transactions in the database. #TODO: the example illustration shows an error with support of 1 but two transactions 1 and 3 #expected_LGC.append(GCA.Node(1/analyzer.db_length,set(['a','h','e','g']),[['a','g'],['a','e']],None)) expected_LGC.append( GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'e', 'g']), [['a', 'g'], ['a', 'e']], None)) expected_LGC.append( GCA.Node(1 / analyzer.db_length, set(['a', 'h', 'b', 'c', 'e', 'g']), [['a', 'b'], ['a', 'g', 'c'], ['a', 'e', 'c']], None)) expected_LGC.append( GCA.Node(2 / analyzer.db_length, set(['a', 'h', 'c']), [['a', 'c']], None)) expected_LGC.append( GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'e', 'g']), [['f', 'g'], ['f', 'e']], None)) expected_LGC.append( GCA.Node(1 / analyzer.db_length, set(['f', 'h', 'b', 'c', 'e', 'g']), [['f', 'b'], ['f', 'g', 'c'], ['f', 'e', 'c']], None)) expected_LGC.append( GCA.Node(2 / analyzer.db_length, set(['f', 'h', 'c']), [['f', 'c']], None)) expected_LGC.append( GCA.Node(2 / analyzer.db_length, set(['e', 'g', 'h', 'b', 'c']), [['g', 'b'], ['g', 'c'], ['b', 'h'], ['c', 'e', 'h']], None)) expected_LGC.append( GCA.Node(3 / analyzer.db_length, set(['b', 'c', 'e']), [['b', 'e'], ['c', 'e']], None)) expected_LGC.append( GCA.Node(3 / analyzer.db_length, set(['h', 'c']), [['h', 'c']], None)) for index, expected in enumerate(expected_LGC): #check closure match = analyzer.search_node_with_closure(expected.closure) self.assertSequenceEqual(expected.closure, match.closure) #check support self.assertEqual(expected.support, match.support) #check generators for generator in expected.generators: #match = analyzer.search_node_with_generator(None, generator) self.assertIsNotNone(match) self.assertEqual(len(expected_LGC), len(analyzer.lcg_into_list()))