def test_genitemsupportcount(self): data = [] items = [] self.assertEqual(Utils.genItemSupportCount(items, data), []) data = [[[1]]] items = [1] self.assertEqual(Utils.genItemSupportCount(items, data), [1]) data = [[[1]], [[2], [2]], [[1], [1, 2]]] items = [1, 2] self.assertEqual(Utils.genItemSupportCount(items, data), [2, 2]) data = [[[1], [2, 3]], [[4, 5, 6], [7]]] items = [1, 2, 3, 4, 5, 6, 7] self.assertEqual(Utils.genItemSupportCount(items, data), [1, 1, 1, 1, 1, 1, 1]) data = [[[1, 2, 3]]] items = [1, 2, 3] self.assertEqual(Utils.genItemSupportCount(items, data), [1, 1, 1])
def test_level2candidategenspm(self): inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) M = Utils.getUniqueItems(inputData["T"]) M.sort(key=lambda item: inputData["MS"][item]) SUP = Utils.genItemSupportCount(M, inputData["T"]) L = [(M[m], SUP[m]) for m in range(len(M))] out1 = pymsgsp.level2CandidateGenSPM(L) out2 = [[[1, 1]], [[1], [1]], [[1, 2]], [[1], [2]], [[2], [1]], [[2, 2]], [[2], [2]]] out1 = sorted(out1, cmp=self.comparator) out2 = sorted(out2, cmp=self.comparator) self.assertEqual(out1, out2)
def run(self): M = Utils.getUniqueItems(self.T) M.sort(key=lambda item: self.MS[item]) logging.info('M: %s', M) SUP = Utils.genItemSupportCount(M, self.T) logging.info('SUP: %s', SUP) L = [(M[m], SUP[m]) for m in range(len(M))] logging.info('L: %s', L) F1 = [l for l in L if float(l[1])/len(self.T) >= self.MS[l[0]]] F = [ [[f[0]]] for f in F1 ] logging.info('F1: %s length: %s', F1, len(F1)) k = 2 Fk, Ck = F1, [] while(Fk): logging.warning('candidate level: %d', k) if k == 2: Ck = self.level2CandidateGenSPM(L) logging.warning('C2 length: %s', len(Ck)) logging.info('C2: %s length: %s', Ck, len(Ck)) else: Ck = self.MSCandidateGenSPM(Fk) logging.warning('C%d length: %s', k, len(Ck)) logging.info('C%d: %s length: %s', k, Ck, len(Ck)) cSUP = Utils.genSupportCount(Ck, self.T) logging.debug('cSUP: %s', cSUP) Fk = [Ck[c] for c in range(len(Ck)) if float(cSUP[c])/len(self.T) >= self.getMinMIS(Ck[c])] F.extend(Fk) logging.info('F%d: %s', k, Fk) logging.warning('F%d length: %s', k, len(Fk)) k += 1 logging.info('F: %s', F) return F
def run(self): L = Utils.getUniqueItems(self.T) SUP = Utils.genItemSupportCount(L, self.T) lSUP = {} for l in range(len(L)): lSUP[L[l]] = SUP[l] if len(L) > 3: print "SORRY! Can't run Brute Force with these large data" return [] C = Utils.generateAllSubsets(L) S = Utils.generateAllSequences(C) outputData = [] for seq in S: count = 0 minSUP = 999 maxSUP = 0 minMIS = 999 for s in range(len(seq)): for i in seq[s]: if lSUP[i] < minSUP: minSUP = lSUP[i] if lSUP[i] > maxSUP: maxSUP = lSUP[i] if self.MS[i] < minMIS: minMIS = self.MS[i] for d in self.T: if Utils.isSubsequence(seq, d): count += 1 if ((float(count) / len(self.T)) >= minMIS) and (float(maxSUP - minSUP) / len(self.T) <= self.SDC): outputData.append(seq) return outputData
def test_mscandidategenspm(self): inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) T = inputData["T"] M = Utils.getUniqueItems(T) M.sort(key=lambda item: inputData["MS"][item]) SUP = Utils.genItemSupportCount(M, T) L = [(M[m], SUP[m]) for m in range(len(M))] C2 = pymsgsp.level2CandidateGenSPM(L) cSUP = Utils.genSupportCount(C2, T) F2 = [C2[c] for c in range(len(C2)) if float(cSUP[c])/len(T) >= pymsgsp.getMinMIS(C2[c])] out1 = pymsgsp.MSCandidateGenSPM(F2) out2 = [[[1], [1], [1]], [[1], [1, 2]], [[1], [1], [2]], [[1, 2], [2]], [[1], [2], [2]], [[2], [2], [2]]] out1 = sorted(out1, cmp=self.comparator) out2 = sorted(out2, cmp=self.comparator) self.assertEqual(out1, out2)