def main(datafile, min_sup, min_conf, name_file): # Expects the sparse vector format data reader = csv.reader(datafile) data = [] max_val = 0 ignore = {0} for row in reader: r = [] for idx, val in enumerate(row): if idx in ignore: continue i = int(val) r.append(i) if i > max_val: max_val = i data.append(frozenset(r)) freq_sets = apriori.find_frequent_itemsets(data, max_val + 1, min_sup) rules = apriori.find_association_rules(data, freq_sets, min_conf) if name_file: reader = csv.reader(name_file) lookup = {int(l[0]) : l[1] + l[2] for l in reader} print("\n##Skyline frequent itemsets\n~~~") for i, s in enumerate(freq_sets): print("Itemset {}:\n\tContains: {}\n\tSupport: {}".format(i, ", ".join(lookup[p] for p in s), apriori.support(s, data))) print("~~~\n\n##Skyline association rules\n~~~") for i, r in enumerate(rules): print("Rule {}:\n\tLHS: {}\n\tRHS: {}\n\tSupport: {}\n\tConfidence: {}".format(i, ", ".join(lookup[p] for p in r[0]), lookup[r[1]], r[2] * 100, r[3] * 100)) print("~~~") else: for i, r in enumerate(rules): print("Rule {}: {} ---> {} [sup={} conf={}]".format(i, ", ".join(str(i) for i in r[0]), r[1], r[2] * 100, r[3] * 100))
transactions = [] for (id, transaction) in dict.items(): transactions.append(transaction) return list(transactions) ## Example of usage # Data file syntax: userID, websiteName # you may also want to remove whitespace characters like `\n` at the end of each line items = [] items.append('Knowledge Base') transactions = loadData('weblog.csv') print("Support for Knowledge Base") print(ap.support(items, transactions)) items.append('Support Desktop') print("Support for Knowledge Base and Support Desktop") print(ap.support(items, transactions)) items.append('MS Office') print("Support for Knowledge Base,Support Desktop and MS Office") print(ap.support(items, transactions)) print("Confidence For Knowledge Base => Support Desktop") print(ap.confidence('Knowledge Base', 'Support Desktop', transactions)) print("Frequent Items sets")
#use support instead of supportcount bestSeqs = map(lambda s: [getNames(s[0], courseNames), s[1]/len(datum)], bestSeqs) #Include the course name for each with open("popularArches.py", "a") as myfile: myfile.write("seqs = [") myfile.write(str(bestSeqs)) myfile.write("] \n") exit(1) regs = float(len(datumSeq)) print "a" tmp = map(lambda g: [g[0], g[1]/regs, apriori.support(apriori.supportList(g[0], datumSeq), regs)], graduatingSeqs) print "b" graduatingRules = map(lambda g: [getNames(g[0], courseNames), g[1], g[1]/g[2]], tmp) print "c" tmp = map(lambda g: [g[0], g[1]/regs, apriori.support(apriori.supportList(g[0], datumSeq), regs)], notGraduatingSeqs) print "d" notGraduatingRules = map(lambda g: [getNames(g[0], courseNames), g[1], g[1]/g[2]],tmp) print "e" with open("graduatingRules.py", "a") as myfile: myfile.write(str(graduatingRules) + "\n") with open("notGraduatingRules.py", "a") as myfile: myfile.write(str(notGraduatingRules) + "\n")