def _get_frequent_features(self): """Frequent Features are found using apriori algorithm""" feature_terms = [sub_items for items in self.df['noun_and_np'].values for sub_items in items] C1 = apriori.createC1(feature_terms) D = map(set, feature_terms) L1, support_data = apriori.scanD(D,C1,0.01) # minimum support 0.01 self.frequent_features = map(lambda x: "".join(list(x)), L1)
class Test: if __name__ == "__main__": #fza=frozenset(['a','bc']) #adict={fza:1,'b':2} #print(adict) # print (isinstance('36521dyht', str)) ##可以判断变量 x 是否是字符串; #cc= loadDataSet() #createC1(cc) #c=[6,5,4,9,8,3,5,6,8,1] #c.sort() #print(c) dataSet = apriori.loadDataSet() print(dataSet) C1 = apriori.createC1(dataSet) C1 D = map(set, dataSet) L1 = [] supportData = [] (L1, supportData) = apriori.scanD(D, C1, 0.5) print(L1) print(supportData)
# coding:utf-8 import apriori # 发现频繁项集和发现关联规则 dataSet = apriori.loadDataSet() print(dataSet) C1 = apriori.createC1(dataSet) print(C1) D = map(set, dataSet) print(D) L1, suppData0 = apriori.scanD(D, C1, 0.5) print(L1) L, suppData = apriori.apriori(dataSet) print(L) L, suppData = apriori.apriori(dataSet, minSupport=0.5) rules = apriori.generateRules(L, suppData, minConf=0.7) print rules rules = apriori.generateRules(L, suppData, minConf=0.5) print rules
#!/usr/bin/env python import sys from apriori import createC1 from apriori import scanD from apriori import aprioriGen a = [] for i in sys.stdin: i = i.strip() v = i.split() a.append(v) c1 = createC1(a) d = map(set, a) K = 2 ps = 0.3 for i in range(K): L1, sp = scanD(d, c1, ps) c1 = aprioriGen(L1, i + 1) for i in L1: x, y = i x = int(x) y = int(y) print "%d %d" % (x, y)
def aprioriGen(Lk, k): #creates Ck retList = [] lenLk = len(Lk) for i in range(lenLk): for j in range(i + 1, lenLk): L1 = list(Lk[i])[:k - 2] L2 = list(Lk[j])[:k - 2] L1.sort() L2.sort() if L1 == L2: #if first k-2 elements are equal retList.append(Lk[i] | Lk[j]) #set union return retList dataSet = loadDataSet() minSupport = 0.5 '''def apriori(dataSet, minSupport = 0.5):''' C1 = createC1(dataSet) D = list(map(set, dataSet)) L1, supportData = scanD(D, C1, minSupport) L = [L1] k = 2 while (len(L[k - 2]) > 0): Ck = aprioriGen(L[k - 2], k) Lk, supK = scanD(D, Ck, minSupport) #scan DB to get Lk supportData.update(supK) L.append(Lk) k += 1 #return L, supportData
#!/usr/bin/python # -*- coding: latin-1 -*- ''' Experiments with apriori ''' import apriori import random import loadText support = 0.4 loadText.importFromFile('spanish_db.txt') dataset = loadText.rawPriori #print dataset C1 = apriori.createC1(dataset) #print 'C1', C1 D = map(set,dataset) #print 'D', D L1, support_data = apriori.scanD(D,C1,support) #print 'L1', L1 #print 'support_data', support_data k_length = 2 transactions = apriori.aprioriGen(L1, k_length) #print 'transactions', transactions #print '\n*** *** ***' L,support_data = apriori.apriori(dataset, support) #print 'L', L #print 'support_data', support_data rules = apriori.generateRules(L, support_data, min_confidence=0.7) #print 'rules', rules
#!/usr/bin/python # -*- coding: latin-1 -*- ''' Experiments with apriori ''' import apriori import random import loadText support = 0.1 loadText.importFromFile('snowflakes_db.txt') dataset = loadText.rawPriori #print dataset C1 = apriori.createC1(dataset) #print 'C1', C1 D = map(set, dataset) #print 'D', D L1, support_data = apriori.scanD(D, C1, support) #print 'L1', L1 #print 'support_data', support_data print 'support_data' for k, v in support_data.iteritems(): print k, v k_length = 2 transactions = apriori.aprioriGen(L1, k_length) #print 'transactions', transactions #print '\n*** *** ***' L, support_data = apriori.apriori(dataset, support) #print 'L', L #print 'support_data', support_data
import apriori dataSet = apriori.loadDataSet() print "dataSet" print dataSet C1 = apriori.createC1(dataSet) print "C1" print C1 D=map(set, dataSet) print "D" print D L1, suppData0 = apriori.scanD(D, C1, 0.5) print "L1" print L1 print "suppData0" print suppData0 L,suppData = apriori.apriori(dataSet, minSupport=0.5) print "L" print L print "suppData" print suppData rules = apriori.generateRules(L, suppData, minConf=0.7) print "rules" print rules rules = apriori.generateRules(L, suppData, minConf=0.5)
'germany-switzerland-summit', 'athens-sicily', 'venice-the-alps-paris', 'croatia', 'madrid-andalusia', 'london-paris-barcelona', 'holocaust-history', 'dominican-republic-environmental-citizenship' ] df = pd.DataFrame(et_tours, columns=tour_names) #print(df.shape) #print(df.head) for index in range(len(tour_names)): # tour_to_name = {'0' : np.nan, '1' : tour_names[index],'2' : tour_names[index],'1' : tour_names[index],'3' : tour_names[index],'4' : tour_names[index],'5' : tour_names[index],'6' : tour_names[index],'7' : tour_names[index]} tour_to_name = {0: np.nan, 1: tour_names[index]} df[tour_names[index]] = df[tour_names[index]].map(tour_to_name) print(df.head) C1 = createC1(df) #new stuff D = map(set, df) #new stuff L1, support_data = scanD(D, C1, 0.0000005) #new stuff my_data = list() for index in range(df.shape[0]): basket = list(df.ix[index].dropna()) my_data.append(basket) L, suppData = apriori(my_data) print('Identified rules with support = ', alpha, 'and confidence= ', beta) rules = generateRules(L, suppData, minConf=beta) n_other_items = 1 while n_other_items <= max_other_items:
def test1(): dataSet = apriori.loadDataSet() C1 = apriori.createC1(dataSet) L, supportData = apriori.apriori(dataSet, minSupport=0.5) # print(L) rules = apriori.generateRules(L, supportData, minConf=0.5)
import apriori as ap dataSet = ap.loadDataSet() #print dataSet C1 = ap.createC1(dataSet) #print C1 D = map(set, dataSet) #print D L1, suppData0 = ap.scanD(D, C1, 0.5) #print suppData0 L, S = ap.apriori(D, 0.5) #print L print L List = ap.generateRules(L, S, minConf=0.4) print List
from numpy import * import recentAprioriTest def loadDataSet(): return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]] if __name__ == '__main__': datSet = loadDataSet() #[[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]] datMat = mat(datSet) # print(datMat) # Cl = apriori.createC1(datSet) print("C1") print(Cl) # D = map(set, datSet) #D[set([1, 3, 4]), set([2, 3, 5]), set([1, 2, 3, 5]), set([2, 5])] L1, suppData0 = apriori.scanD(D, Cl, 0.5) #retList, supportData print("retList-L1") print(L1) print("supportData-suppData0") print(suppData0) # apriori.aprioriGen() L2, suppData0 = apriori.apriori(datSet) print("L2") print(L2)
import apriori dataMat = apriori.loadDataSet() print(dataMat) dataSet = apriori.createC1(dataMat) print(dataSet) L, supportData = apriori.apriori(dataMat) print(L) print(supportData) apriori.generateRules(L, supportData, 0.5)