Пример #1
0
    def test_genitemsupportcount(self):
        data = []
        items = []
        self.assertEqual(Utils.genItemSupportCount(items, data), [])

        data = [[[1]]]
        items = [1]
        self.assertEqual(Utils.genItemSupportCount(items, data), [1])

        data = [[[1]], [[2], [2]], [[1], [1, 2]]]
        items = [1, 2]
        self.assertEqual(Utils.genItemSupportCount(items, data), [2, 2])

        data = [[[1], [2, 3]], [[4, 5, 6], [7]]]
        items = [1, 2, 3, 4, 5, 6, 7]
        self.assertEqual(Utils.genItemSupportCount(items, data), [1, 1, 1, 1, 1, 1, 1])

        data = [[[1, 2, 3]]]
        items = [1, 2, 3]
        self.assertEqual(Utils.genItemSupportCount(items, data), [1, 1, 1])
Пример #2
0
    def test_level2candidategenspm(self):
        inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904}
        pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"])

        M = Utils.getUniqueItems(inputData["T"])
        M.sort(key=lambda item: inputData["MS"][item])
        SUP = Utils.genItemSupportCount(M, inputData["T"])
        L = [(M[m], SUP[m]) for m in range(len(M))]

        out1 = pymsgsp.level2CandidateGenSPM(L)
        out2 = [[[1, 1]], [[1], [1]], [[1, 2]], [[1], [2]], [[2], [1]], [[2, 2]], [[2], [2]]]

        out1 = sorted(out1, cmp=self.comparator)
        out2 = sorted(out2, cmp=self.comparator)

        self.assertEqual(out1, out2)
Пример #3
0
    def run(self):
        M = Utils.getUniqueItems(self.T)
        M.sort(key=lambda item: self.MS[item])
        logging.info('M: %s', M)

        SUP = Utils.genItemSupportCount(M, self.T)
        logging.info('SUP: %s', SUP)

        L = [(M[m], SUP[m]) for m in range(len(M))]
        logging.info('L: %s', L)

        F1 = [l for l in L if float(l[1])/len(self.T) >= self.MS[l[0]]]
        F = [ [[f[0]]] for f in F1 ]

        logging.info('F1: %s length: %s', F1, len(F1))

        k = 2
        Fk, Ck = F1, []

        while(Fk):
            logging.warning('candidate level: %d', k)
            if k == 2:
                Ck = self.level2CandidateGenSPM(L)
                logging.warning('C2 length: %s', len(Ck))
                logging.info('C2: %s length: %s', Ck, len(Ck))

            else:
                Ck = self.MSCandidateGenSPM(Fk)
                logging.warning('C%d length: %s', k, len(Ck))
                logging.info('C%d: %s length: %s', k, Ck, len(Ck))

            cSUP = Utils.genSupportCount(Ck, self.T)
            logging.debug('cSUP: %s', cSUP)
            Fk = [Ck[c] for c in range(len(Ck)) if float(cSUP[c])/len(self.T) >= self.getMinMIS(Ck[c])]
            F.extend(Fk)

            logging.info('F%d: %s', k, Fk)
            logging.warning('F%d length: %s', k, len(Fk))

            k += 1

        logging.info('F: %s', F)
        return F
Пример #4
0
    def run(self):

        L = Utils.getUniqueItems(self.T)
        SUP = Utils.genItemSupportCount(L, self.T)
        lSUP = {}
        for l in range(len(L)):
            lSUP[L[l]] = SUP[l]

        if len(L) > 3:
            print "SORRY! Can't run Brute Force with these large data"
            return []

        C = Utils.generateAllSubsets(L)
        S = Utils.generateAllSequences(C)

        outputData = []

        for seq in S:
            count = 0
            minSUP = 999
            maxSUP = 0
            minMIS = 999

            for s in range(len(seq)):
                for i in seq[s]:
                    if lSUP[i] < minSUP:
                        minSUP = lSUP[i]
                    if lSUP[i] > maxSUP:
                        maxSUP = lSUP[i]
                    if self.MS[i] < minMIS:
                        minMIS = self.MS[i]

            for d in self.T:
                if Utils.isSubsequence(seq, d):
                    count += 1

            if ((float(count) / len(self.T)) >= minMIS) and (float(maxSUP - minSUP) / len(self.T) <= self.SDC):
                outputData.append(seq)

        return outputData
Пример #5
0
    def test_mscandidategenspm(self):

        inputData = {'T': [[[1]], [[2], [2]], [[1], [1, 2]]], 'MS': {1: 0.09600845652974467, 2: 0.2357830588199925}, 'SDC': 0.056047812216985904}
        pymsgsp = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"])

        T = inputData["T"]
        M = Utils.getUniqueItems(T)
        M.sort(key=lambda item: inputData["MS"][item])
        SUP = Utils.genItemSupportCount(M, T)
        L = [(M[m], SUP[m]) for m in range(len(M))]

        C2 = pymsgsp.level2CandidateGenSPM(L)
        cSUP = Utils.genSupportCount(C2, T)
        F2 = [C2[c] for c in range(len(C2)) if float(cSUP[c])/len(T) >= pymsgsp.getMinMIS(C2[c])]

        out1 = pymsgsp.MSCandidateGenSPM(F2)
        out2 = [[[1], [1], [1]], [[1], [1, 2]], [[1], [1], [2]], [[1, 2], [2]], [[1], [2], [2]], [[2], [2], [2]]]

        out1 = sorted(out1, cmp=self.comparator)
        out2 = sorted(out2, cmp=self.comparator)

        self.assertEqual(out1, out2)