Пример #1
0
    def test_gensupportcount(self):
        data = []
        candidates = []
        self.assertEqual(Utils.genSupportCount(candidates, data), [])

        data = [[[1]]]
        candidates = [[[1]]]
        self.assertEqual(Utils.genSupportCount(candidates, data), [1])

        data = [[[1]], [[2], [2]], [[1], [1, 2]]]
        candidates = [[[1]], [[2]], [[2], [2]], [[1], [2]], [[2], [1]]]
        self.assertEqual(Utils.genSupportCount(candidates, data), [2, 2, 1, 1, 0])
Пример #2
0
    def run(self):
        M = Utils.getUniqueItems(self.T)
        M.sort(key=lambda item: self.MS[item])
        logging.info('M: %s', M)

        SUP = Utils.genItemSupportCount(M, self.T)
        logging.info('SUP: %s', SUP)

        L = [(M[m], SUP[m]) for m in range(len(M))]
        logging.info('L: %s', L)

        F1 = [l for l in L if float(l[1])/len(self.T) >= self.MS[l[0]]]
        F = [ [[f[0]]] for f in F1 ]

        logging.info('F1: %s length: %s', F1, len(F1))

        k = 2
        Fk, Ck = F1, []

        while(Fk):
            logging.warning('candidate level: %d', k)
            if k == 2:
                Ck = self.level2CandidateGenSPM(L)
                logging.warning('C2 length: %s', len(Ck))
                logging.info('C2: %s length: %s', Ck, len(Ck))

            else:
                Ck = self.MSCandidateGenSPM(Fk)
                logging.warning('C%d length: %s', k, len(Ck))
                logging.info('C%d: %s length: %s', k, Ck, len(Ck))

            cSUP = Utils.genSupportCount(Ck, self.T)
            logging.debug('cSUP: %s', cSUP)
            Fk = [Ck[c] for c in range(len(Ck)) if float(cSUP[c])/len(self.T) >= self.getMinMIS(Ck[c])]
            F.extend(Fk)

            logging.info('F%d: %s', k, Fk)
            logging.warning('F%d length: %s', k, len(Fk))

            k += 1

        logging.info('F: %s', F)
        return F
Пример #3
0
    def test_mscandidategenspm(self):

        inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904}
        pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"])

        T = inputData["T"]
        M = Utils.getUniqueItems(T)
        M.sort(key=lambda item: inputData["MS"][item])
        SUP = Utils.genItemSupportCount(M, T)
        L = [(M[m], SUP[m]) for m in range(len(M))]

        C2 = pymsgsp.level2CandidateGenSPM(L)
        cSUP = Utils.genSupportCount(C2, T)
        F2 = [C2[c] for c in range(len(C2)) if float(cSUP[c])/len(T) >= pymsgsp.getMinMIS(C2[c])]

        out1 = pymsgsp.MSCandidateGenSPM(F2)
        out2 = [[[1], [1], [1]], [[1], [1, 2]], [[1], [1], [2]], [[1, 2], [2]], [[1], [2], [2]], [[2], [2], [2]]]

        out1 = sorted(out1, cmp=self.comparator)
        out2 = sorted(out2, cmp=self.comparator)

        self.assertEqual(out1, out2)