def _get_frequent_features(self): """Frequent Features are found using apriori algorithm""" feature_terms = [sub_items for items in self.df['noun_and_np'].values for sub_items in items] C1 = apriori.createC1(feature_terms) D = map(set, feature_terms) L1, support_data = apriori.scanD(D,C1,0.01) # minimum support 0.01 self.frequent_features = map(lambda x: "".join(list(x)), L1)
class Test: if __name__ == "__main__": #fza=frozenset(['a','bc']) #adict={fza:1,'b':2} #print(adict) # print (isinstance('36521dyht', str)) ##可以判断变量 x 是否是字符串; #cc= loadDataSet() #createC1(cc) #c=[6,5,4,9,8,3,5,6,8,1] #c.sort() #print(c) dataSet = apriori.loadDataSet() print(dataSet) C1 = apriori.createC1(dataSet) C1 D = map(set, dataSet) L1 = [] supportData = [] (L1, supportData) = apriori.scanD(D, C1, 0.5) print(L1) print(supportData)
# coding:utf-8 import apriori # 发现频繁项集和发现关联规则 dataSet = apriori.loadDataSet() print(dataSet) C1 = apriori.createC1(dataSet) print(C1) D = map(set, dataSet) print(D) L1, suppData0 = apriori.scanD(D, C1, 0.5) print(L1) L, suppData = apriori.apriori(dataSet) print(L) L, suppData = apriori.apriori(dataSet, minSupport=0.5) rules = apriori.generateRules(L, suppData, minConf=0.7) print rules rules = apriori.generateRules(L, suppData, minConf=0.5) print rules
#!/usr/bin/env python import sys from apriori import createC1 from apriori import scanD from apriori import aprioriGen a = [] for i in sys.stdin: i = i.strip() v = i.split() a.append(v) c1 = createC1(a) d = map(set, a) K = 2 ps = 0.3 for i in range(K): L1, sp = scanD(d, c1, ps) c1 = aprioriGen(L1, i + 1) for i in L1: x, y = i x = int(x) y = int(y) print "%d %d" % (x, y)
Created on Tue May 16 10:26:18 2017 @author: 凯风 """ from imp import reload import apriori reload(apriori) dataSet = apriori.loadDataSet() # 获取数据 dataSet C1 = apriori.creadteC1(dataSet) # 获取数据集的C1-候选项集合 C1 D = list(map(set, dataSet)) # 把数据转换成集合的形式存放在列表中 D L1, supportData0 = apriori.scanD( D, C1, 0.5) # 以0.5支持度为要求,计算候选集的每一个项的支持度,并返回大于支持度的集合L1 L1 supportData0 # 根据支持度生成频繁集 reload(apriori) L, supportData = apriori.apriori(dataSet) L # 获得支持度大于0.5的频繁集合 L[0] # 包含一个元素的 L[1] # 包含两个元素的 L[2] # 包含三个元素的 L[3] apriori.aprioriGen(L[0], 2) # 看一下如何生成的未和支持度比较的‘L[1]’ L, supportData = apriori.apriori(dataSet, minSupport=0.7) # 更大的支持度,获得少的结果了 # 根据可信度生成关联规则
def aprioriGen(Lk, k): #creates Ck retList = [] lenLk = len(Lk) for i in range(lenLk): for j in range(i + 1, lenLk): L1 = list(Lk[i])[:k - 2] L2 = list(Lk[j])[:k - 2] L1.sort() L2.sort() if L1 == L2: #if first k-2 elements are equal retList.append(Lk[i] | Lk[j]) #set union return retList dataSet = loadDataSet() minSupport = 0.5 '''def apriori(dataSet, minSupport = 0.5):''' C1 = createC1(dataSet) D = list(map(set, dataSet)) L1, supportData = scanD(D, C1, minSupport) L = [L1] k = 2 while (len(L[k - 2]) > 0): Ck = aprioriGen(L[k - 2], k) Lk, supK = scanD(D, Ck, minSupport) #scan DB to get Lk supportData.update(supK) L.append(Lk) k += 1 #return L, supportData
Experiments with apriori ''' import apriori import random import loadText support = 0.4 loadText.importFromFile('spanish_db.txt') dataset = loadText.rawPriori #print dataset C1 = apriori.createC1(dataset) #print 'C1', C1 D = map(set,dataset) #print 'D', D L1, support_data = apriori.scanD(D,C1,support) #print 'L1', L1 #print 'support_data', support_data k_length = 2 transactions = apriori.aprioriGen(L1, k_length) #print 'transactions', transactions #print '\n*** *** ***' L,support_data = apriori.apriori(dataset, support) #print 'L', L #print 'support_data', support_data rules = apriori.generateRules(L, support_data, min_confidence=0.7) #print 'rules', rules ruleDict = apriori.generateRuleDict(rules) '''
Experiments with apriori ''' import apriori import random import loadText support = 0.1 loadText.importFromFile('snowflakes_db.txt') dataset = loadText.rawPriori #print dataset C1 = apriori.createC1(dataset) #print 'C1', C1 D = map(set, dataset) #print 'D', D L1, support_data = apriori.scanD(D, C1, support) #print 'L1', L1 #print 'support_data', support_data print 'support_data' for k, v in support_data.iteritems(): print k, v k_length = 2 transactions = apriori.aprioriGen(L1, k_length) #print 'transactions', transactions #print '\n*** *** ***' L, support_data = apriori.apriori(dataset, support) #print 'L', L #print 'support_data', support_data rules = apriori.generateRules(L, support_data, min_confidence=0.7) #print 'rules', rules
import apriori dataSet = apriori.loadDataSet() print "dataSet" print dataSet C1 = apriori.createC1(dataSet) print "C1" print C1 D=map(set, dataSet) print "D" print D L1, suppData0 = apriori.scanD(D, C1, 0.5) print "L1" print L1 print "suppData0" print suppData0 L,suppData = apriori.apriori(dataSet, minSupport=0.5) print "L" print L print "suppData" print suppData rules = apriori.generateRules(L, suppData, minConf=0.7) print "rules" print rules rules = apriori.generateRules(L, suppData, minConf=0.5)
'holocaust-history', 'dominican-republic-environmental-citizenship' ] df = pd.DataFrame(et_tours, columns=tour_names) #print(df.shape) #print(df.head) for index in range(len(tour_names)): # tour_to_name = {'0' : np.nan, '1' : tour_names[index],'2' : tour_names[index],'1' : tour_names[index],'3' : tour_names[index],'4' : tour_names[index],'5' : tour_names[index],'6' : tour_names[index],'7' : tour_names[index]} tour_to_name = {0: np.nan, 1: tour_names[index]} df[tour_names[index]] = df[tour_names[index]].map(tour_to_name) print(df.head) C1 = createC1(df) #new stuff D = map(set, df) #new stuff L1, support_data = scanD(D, C1, 0.0000005) #new stuff my_data = list() for index in range(df.shape[0]): basket = list(df.ix[index].dropna()) my_data.append(basket) L, suppData = apriori(my_data) print('Identified rules with support = ', alpha, 'and confidence= ', beta) rules = generateRules(L, suppData, minConf=beta) n_other_items = 1 while n_other_items <= max_other_items: print('\nRules with ', n_other_items, 'other item(s)') for item in L[n_other_items]:
import apriori as ap dataSet = ap.loadDataSet() #print dataSet C1 = ap.createC1(dataSet) #print C1 D = map(set, dataSet) #print D L1, suppData0 = ap.scanD(D, C1, 0.5) #print suppData0 L, S = ap.apriori(D, 0.5) #print L print L List = ap.generateRules(L, S, minConf=0.4) print List