def test_issubsequence(self): sub = [] sup = [] self.assertTrue(Utils.isSubsequence(sub, sup)) sub = [] sup = [[1], [2]] self.assertTrue(Utils.isSubsequence(sub, sup)) sub = [[2], [3]] sup = [[1, 2, 3], [1, 2, 3, 4]] self.assertTrue(Utils.isSubsequence(sub, sup)) sub = [[1, 3], [3, 3, 3]] sup = [[1, 2, 3], [1, 2, 3, 4]] self.assertFalse(Utils.isSubsequence(sub, sup))
def canPrune(self, seq): sLowestMIS = self.getStrictlyMinimumMIS(seq) k = Utils.seqLength(seq) for i in range(k): item = Utils.getItem(seq, i) if self.MS[item] == sLowestMIS: continue k_1_subseq = Utils.removeItem(seq, i) count = 0 for d in self.T: if Utils.isSubsequence(k_1_subseq, d): count += 1 if float(count) / len(self.T) < self.getMinMIS(k_1_subseq): return True return False
def run(self): L = Utils.getUniqueItems(self.T) SUP = Utils.genItemSupportCount(L, self.T) lSUP = {} for l in range(len(L)): lSUP[L[l]] = SUP[l] if len(L) > 3: print "SORRY! Can't run Brute Force with these large data" return [] C = Utils.generateAllSubsets(L) S = Utils.generateAllSequences(C) outputData = [] for seq in S: count = 0 minSUP = 999 maxSUP = 0 minMIS = 999 for s in range(len(seq)): for i in seq[s]: if lSUP[i] < minSUP: minSUP = lSUP[i] if lSUP[i] > maxSUP: maxSUP = lSUP[i] if self.MS[i] < minMIS: minMIS = self.MS[i] for d in self.T: if Utils.isSubsequence(seq, d): count += 1 if ((float(count) / len(self.T)) >= minMIS) and (float(maxSUP - minSUP) / len(self.T) <= self.SDC): outputData.append(seq) return outputData
data_file = sys.argv[2] para_file = sys.argv[3] result_file = sys.argv[4] DP = DataProcessor(data_file, para_file, result_file, False) inputData = DP.loadInput() startTime = datetime.now() print 'Execution started at:', startTime algo = pyMSGSP(inputData["T"], inputData["MS"], inputData["SDC"], logging.INFO) # algo = BruteForceSPM(inputData["T"], inputData["MS"], inputData["SDC"]) outputData = algo.run() print 'Execution time:', datetime.now() - startTime outputDict = defaultdict(list) for seq in outputData: count = 0 for d in inputData["T"]: if Utils.isSubsequence(seq, d): count += 1 outputDict[Utils.seqLength(seq)].append((seq, count)) DP.printOutput(outputDict)