def test_gensupportcount(self): data = [] candidates = [] self.assertEqual(Utils.genSupportCount(candidates, data), []) data = [[[1]]] candidates = [[[1]]] self.assertEqual(Utils.genSupportCount(candidates, data), [1]) data = [[[1]], [[2], [2]], [[1], [1, 2]]] candidates = [[[1]], [[2]], [[2], [2]], [[1], [2]], [[2], [1]]] self.assertEqual(Utils.genSupportCount(candidates, data), [2, 2, 1, 1, 0])
def run(self): M = Utils.getUniqueItems(self.T) M.sort(key=lambda item: self.MS[item]) logging.info('M: %s', M) SUP = Utils.genItemSupportCount(M, self.T) logging.info('SUP: %s', SUP) L = [(M[m], SUP[m]) for m in range(len(M))] logging.info('L: %s', L) F1 = [l for l in L if float(l[1])/len(self.T) >= self.MS[l[0]]] F = [ [[f[0]]] for f in F1 ] logging.info('F1: %s length: %s', F1, len(F1)) k = 2 Fk, Ck = F1, [] while(Fk): logging.warning('candidate level: %d', k) if k == 2: Ck = self.level2CandidateGenSPM(L) logging.warning('C2 length: %s', len(Ck)) logging.info('C2: %s length: %s', Ck, len(Ck)) else: Ck = self.MSCandidateGenSPM(Fk) logging.warning('C%d length: %s', k, len(Ck)) logging.info('C%d: %s length: %s', k, Ck, len(Ck)) cSUP = Utils.genSupportCount(Ck, self.T) logging.debug('cSUP: %s', cSUP) Fk = [Ck[c] for c in range(len(Ck)) if float(cSUP[c])/len(self.T) >= self.getMinMIS(Ck[c])] F.extend(Fk) logging.info('F%d: %s', k, Fk) logging.warning('F%d length: %s', k, len(Fk)) k += 1 logging.info('F: %s', F) return F
def test_mscandidategenspm(self): inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904} pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"]) T = inputData["T"] M = Utils.getUniqueItems(T) M.sort(key=lambda item: inputData["MS"][item]) SUP = Utils.genItemSupportCount(M, T) L = [(M[m], SUP[m]) for m in range(len(M))] C2 = pymsgsp.level2CandidateGenSPM(L) cSUP = Utils.genSupportCount(C2, T) F2 = [C2[c] for c in range(len(C2)) if float(cSUP[c])/len(T) >= pymsgsp.getMinMIS(C2[c])] out1 = pymsgsp.MSCandidateGenSPM(F2) out2 = [[[1], [1], [1]], [[1], [1, 2]], [[1], [1], [2]], [[1, 2], [2]], [[1], [2], [2]], [[2], [2], [2]]] out1 = sorted(out1, cmp=self.comparator) out2 = sorted(out2, cmp=self.comparator) self.assertEqual(out1, out2)