def _get_frequent_features(self):
     """Frequent Features are found using apriori algorithm"""
     feature_terms = [sub_items for items in self.df['noun_and_np'].values for sub_items in items]
     C1 = apriori.createC1(feature_terms)
     D = map(set, feature_terms)
     L1, support_data = apriori.scanD(D,C1,0.01) # minimum support 0.01
     self.frequent_features = map(lambda x: "".join(list(x)), L1)
Пример #2
0
class Test:
    if __name__ == "__main__":
        #fza=frozenset(['a','bc'])
        #adict={fza:1,'b':2}
        #print(adict)
        # print (isinstance('36521dyht', str)) ##可以判断变量 x 是否是字符串;
        #cc= loadDataSet()
        #createC1(cc)
        #c=[6,5,4,9,8,3,5,6,8,1]
        #c.sort()
        #print(c)
        dataSet = apriori.loadDataSet()
        print(dataSet)
        C1 = apriori.createC1(dataSet)
        C1
        D = map(set, dataSet)
        L1 = []
        supportData = []
        (L1, supportData) = apriori.scanD(D, C1, 0.5)
        print(L1)
        print(supportData)
Пример #3
0
# coding:utf-8

import apriori

# 发现频繁项集和发现关联规则

dataSet = apriori.loadDataSet()
print(dataSet)

C1 = apriori.createC1(dataSet)
print(C1)

D = map(set, dataSet)
print(D)

L1, suppData0 = apriori.scanD(D, C1, 0.5)
print(L1)

L, suppData = apriori.apriori(dataSet)
print(L)

L, suppData = apriori.apriori(dataSet, minSupport=0.5)
rules = apriori.generateRules(L, suppData, minConf=0.7)
print rules

rules = apriori.generateRules(L, suppData, minConf=0.5)
print rules
Пример #4
0
#!/usr/bin/env python
import sys
from apriori import createC1
from apriori import scanD
from apriori import aprioriGen

a = []
for i in sys.stdin:
    i = i.strip()
    v = i.split()
    a.append(v)
c1 = createC1(a)
d = map(set, a)

K = 2
ps = 0.3

for i in range(K):
    L1, sp = scanD(d, c1, ps)
    c1 = aprioriGen(L1, i + 1)

for i in L1:
    x, y = i
    x = int(x)
    y = int(y)
    print "%d %d" % (x, y)

def aprioriGen(Lk, k):  #creates Ck
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i + 1, lenLk):
            L1 = list(Lk[i])[:k - 2]
            L2 = list(Lk[j])[:k - 2]
            L1.sort()
            L2.sort()
            if L1 == L2:  #if first k-2 elements are equal
                retList.append(Lk[i] | Lk[j])  #set union
    return retList


dataSet = loadDataSet()
minSupport = 0.5
'''def apriori(dataSet, minSupport = 0.5):'''
C1 = createC1(dataSet)
D = list(map(set, dataSet))
L1, supportData = scanD(D, C1, minSupport)
L = [L1]
k = 2
while (len(L[k - 2]) > 0):
    Ck = aprioriGen(L[k - 2], k)
    Lk, supK = scanD(D, Ck, minSupport)  #scan DB to get Lk
    supportData.update(supK)
    L.append(Lk)
    k += 1
#return L, supportData
Пример #6
0
#!/usr/bin/python
# -*- coding: latin-1 -*-

'''
Experiments with apriori
'''

import apriori
import random
import loadText

support = 0.4
loadText.importFromFile('spanish_db.txt')
dataset = loadText.rawPriori
#print dataset
C1 = apriori.createC1(dataset)
#print 'C1', C1
D = map(set,dataset)
#print 'D', D
L1, support_data = apriori.scanD(D,C1,support)
#print 'L1', L1
#print 'support_data', support_data
k_length = 2
transactions = apriori.aprioriGen(L1, k_length)
#print 'transactions', transactions
#print '\n*** *** ***'
L,support_data = apriori.apriori(dataset, support)
#print 'L', L
#print 'support_data', support_data
rules = apriori.generateRules(L, support_data, min_confidence=0.7)
#print 'rules', rules
Пример #7
0
#!/usr/bin/python
# -*- coding: latin-1 -*-
'''
Experiments with apriori
'''

import apriori
import random
import loadText

support = 0.1
loadText.importFromFile('snowflakes_db.txt')
dataset = loadText.rawPriori
#print dataset
C1 = apriori.createC1(dataset)
#print 'C1', C1
D = map(set, dataset)
#print 'D', D
L1, support_data = apriori.scanD(D, C1, support)
#print 'L1', L1
#print 'support_data', support_data
print 'support_data'
for k, v in support_data.iteritems():
    print k, v
k_length = 2
transactions = apriori.aprioriGen(L1, k_length)
#print 'transactions', transactions
#print '\n*** *** ***'
L, support_data = apriori.apriori(dataset, support)
#print 'L', L
#print 'support_data', support_data
Пример #8
0
import apriori

dataSet = apriori.loadDataSet()
print "dataSet"
print dataSet
C1 = apriori.createC1(dataSet)
print "C1"
print C1

D=map(set, dataSet)
print "D"
print D

L1, suppData0 = apriori.scanD(D, C1, 0.5)
print "L1"
print L1
print "suppData0"
print suppData0


L,suppData = apriori.apriori(dataSet, minSupport=0.5)
print "L"
print L
print "suppData"
print suppData

rules = apriori.generateRules(L, suppData, minConf=0.7)
print "rules"
print rules

rules = apriori.generateRules(L, suppData, minConf=0.5)
    'germany-switzerland-summit', 'athens-sicily', 'venice-the-alps-paris',
    'croatia', 'madrid-andalusia', 'london-paris-barcelona',
    'holocaust-history', 'dominican-republic-environmental-citizenship'
]
df = pd.DataFrame(et_tours, columns=tour_names)
#print(df.shape)
#print(df.head)

for index in range(len(tour_names)):
    #        tour_to_name = {'0' : np.nan, '1' : tour_names[index],'2' : tour_names[index],'1' : tour_names[index],'3' : tour_names[index],'4' : tour_names[index],'5' : tour_names[index],'6' : tour_names[index],'7' : tour_names[index]}
    tour_to_name = {0: np.nan, 1: tour_names[index]}
    df[tour_names[index]] = df[tour_names[index]].map(tour_to_name)

print(df.head)

C1 = createC1(df)  #new stuff
D = map(set, df)  #new stuff
L1, support_data = scanD(D, C1, 0.0000005)  #new stuff

my_data = list()

for index in range(df.shape[0]):
    basket = list(df.ix[index].dropna())
    my_data.append(basket)

L, suppData = apriori(my_data)

print('Identified rules with support = ', alpha, 'and confidence= ', beta)
rules = generateRules(L, suppData, minConf=beta)
n_other_items = 1
while n_other_items <= max_other_items:
Пример #10
0
def test1():
    dataSet = apriori.loadDataSet()
    C1 = apriori.createC1(dataSet)
    L, supportData = apriori.apriori(dataSet, minSupport=0.5)
    # print(L)
    rules = apriori.generateRules(L, supportData, minConf=0.5)
Пример #11
0

import apriori as ap

dataSet = ap.loadDataSet()
#print dataSet
C1 = ap.createC1(dataSet)
#print C1
D = map(set, dataSet)
#print D
L1, suppData0 = ap.scanD(D, C1, 0.5)
#print suppData0
L, S = ap.apriori(D, 0.5)
#print L

print L

List = ap.generateRules(L, S, minConf=0.4)
print List


Пример #12
0
from numpy import *
import recentAprioriTest


def loadDataSet():
    return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]


if __name__ == '__main__':

    datSet = loadDataSet()
    #[[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
    datMat = mat(datSet)
    # print(datMat)
    #
    Cl = apriori.createC1(datSet)
    print("C1")
    print(Cl)
    #
    D = map(set, datSet)
    #D[set([1, 3, 4]), set([2, 3, 5]), set([1, 2, 3, 5]), set([2, 5])]
    L1, suppData0 = apriori.scanD(D, Cl, 0.5)
    #retList, supportData
    print("retList-L1")
    print(L1)
    print("supportData-suppData0")
    print(suppData0)
    # apriori.aprioriGen()
    L2, suppData0 = apriori.apriori(datSet)
    print("L2")
    print(L2)
Пример #13
0
import apriori

dataMat = apriori.loadDataSet()
print(dataMat)

dataSet = apriori.createC1(dataMat)
print(dataSet)

L, supportData = apriori.apriori(dataMat)
print(L)
print(supportData)

apriori.generateRules(L, supportData, 0.5)