def main(): import sys from optparse import OptionParser from apriori import runApriori, dataFromFile, printResults optparser = OptionParser() optparser.add_option('-f', '--inputFile', dest='input', help='filename containing csv', default=None) optparser.add_option('-s', '--minSupport', dest='minS', help='minimum support value', default=0.4, type='float') (options, args) = optparser.parse_args() inFile = None if options.input is None: inFile = sys.stdin elif options.input is not None: inFile = dataFromFile(options.input) else: print ('No dataset filename specified, system with exit\n') sys.exit('System will exit') minSupport = options.minS items, PBoarder, NBoarder = runApriori(inFile, minSupport) printResults(items, minSupport, options.input.split("/")[2], PBoarder, NBoarder)
def runApriori(file): minSupport = float(input("Enter the minimum support value :: ")) minConfidence = float(input("Enter the minimum confidence value :: ")) data = apriori.dataFromFile(file) items, rules = apriori.runApriori(data, minSupport, minConfidence) apriori.printResults(items, rules)
def test_run_apriori_should_get_items_and_rules(self): data = 'apple,beer,rice,chicken\n' data += 'apple,beer,rice\n' data += 'apple,beer\n' data += 'apple,mango\n' data += 'milk,beer,rice,chicken\n' data += 'milk,beer,rice\n' data += 'milk,beer\n' data += 'milk,mango' os.system('echo \'' + data + '\' > test_apriori.csv') inFile = dataFromFile('test_apriori.csv') minSupport = 0.5 minConfidence = 0.05 items, rules = runApriori(inFile, minSupport, minConfidence) expected = [ (('milk',), 0.5), (('apple',), 0.5), (('beer',), 0.75), (('rice',), 0.5), (('beer', 'rice'), 0.5) ] self.assertEqual(items, expected) expected = [ ((('beer',), ('rice',)), 0.6666666666666666), ((('rice',), ('beer',)), 1.0) ] self.assertEqual(rules, expected) os.system('rm test_apriori.csv')
def test_run_apriori_should_get_items_and_rules(self): data = 'apple,beer,rice,chicken\n' data += 'apple,beer,rice\n' data += 'apple,beer\n' data += 'apple,mango\n' data += 'milk,beer,rice,chicken\n' data += 'milk,beer,rice\n' data += 'milk,beer\n' data += 'milk,mango' os.system('echo \'' + data + '\' > test_apriori.csv') inFile = dataFromFile('test_apriori.csv') minSupport = 0.5 minConfidence = 0.05 items, rules = runApriori(inFile, minSupport, minConfidence) expected = [(('milk', ), 0.5), (('apple', ), 0.5), (('beer', ), 0.75), (('rice', ), 0.5), (('beer', 'rice'), 0.5)] self.assertEqual(items, expected) expected = [((('beer', ), ('rice', )), 0.6666666666666666), ((('rice', ), ('beer', )), 1.0)] self.assertEqual(rules, expected) os.system('rm test_apriori.csv')
def sd_apri_main(inFile,buckets_cls,minSupport, minConfidence,result_name): ''' ''' apri_logger.info("start sd_apri") #cfg_file_name = get_cfg_filename(BASE_DIR) get_cfg_filename(BASE_DIR) apri_indi_set = indicator_classify(inFile,buckets_cls) rows_file = apriori.dataFromList(apri_indi_set) items, rules = apriori.runApriori(rows_file, minSupport, minConfidence) result_dict = apriori.printResults(items, rules,result_name) return result_dict
def get_all_recommendation( sup, con, subreddit): """ function to save all of the best recommendation result """ temp_file = apriori.dataFromFile(tempFile_path) items, rules = apriori.runApriori(temp_file, sup, con) recommendation_set = list() for rule, confidence in rules: pre, post = rule for item in pre: if item not in recommendation_set and item.lower() != subreddit.lower(): recommendation_set.append(item) os.remove(tempFile_path) return recommendation_set
def test_run_apriori_should_get_items_and_rules(self): data = 'apple,beer,rice,chicken\n' data += 'apple,beer,rice\n' data += 'apple,beer\n' data += 'apple,mango\n' data += 'milk,beer,rice,chicken\n' data += 'milk,beer,rice\n' data += 'milk,beer\n' data += 'milk,mango\n' with open('test_apriori.csv', 'w') as fh: fh.write(data) inFile = dataFromFile('test_apriori.csv') minSupport = 0.5 minConfidence = 0.05 items, rules = runApriori(inFile, minSupport, minConfidence) ## to make the arrangement consistent items = sorted(items, key=lambda x: (len(x[0]), x[1], x[0])) items = [(set(a), b) for a,b in items] expected = [(("apple",), 0.5), (("milk",), 0.5), (("rice",), 0.5), (("beer",), 0.75), (("beer", "rice"), 0.5)] expected = [(set(a), b) for a,b in expected] self.assertEqual(items, expected) expected = [ ((('beer',), ('rice',)), 0.6666666666666666), ((('rice',), ('beer',)), 1.0) ] self.assertEqual(set(rules), set(expected))
) st.markdown( ' > Support(A) = (Number of transactions in which A appears)/(Total Number of Transactions' ) st.markdown(' > Confidence(A->B) = Support(AUB)/Support(A)') st.markdown('---') support = st.slider("Enter the Minimum Support Value", min_value=0.1, max_value=0.9, value=0.15) confidence = st.slider("Enter the Minimum Confidence Value", min_value=0.1, max_value=0.9, value=0.6) inFile = dataFromFile(default_csv) items, rules = runApriori(inFile, support, confidence) i, r = to_str_results(items, rules) st.markdown("## Results") st.markdown("### Frequent Itemsets") st.write(i) st.markdown("### Frequent Rules") st.write(r)
additives_stats = json.load(open(DATASET_FILENAME, "r")) except: print("Building dataset") additives_stats = openfood.inline_map_reduce(mapper, reducer) json.dump(additives_stats, open(DATASET_FILENAME, "w")) #add_clean = [(x['_id'], x['value'].split("_")) for x in additives_stats] #add_clean.sort() #for add in add_clean: # print("{}: {}".format(add[0], add[1])) ordered_ids = [x['_id'] for x in additives_stats] ordered_additives = [x['value'] for x in additives_stats] vectorizer = CountVectorizer(tokenizer=lambda x: x.split("_"), binary=True) X = vectorizer.fit_transform(ordered_additives) Xarray = X.toarray() print("Feature names: ", vectorizer.get_feature_names()) #print(dir(Xarray)) print("Feature vector size: ", Xarray.size) print("Feature vector shape: ", Xarray.shape) print("Number of samples: ", len(ordered_additives)) #print(type(Xarray)) ordered_additives_split = [x['value'].split("_") for x in additives_stats] minSupport = 0.1 minConfidence = 0.5 items, rules = runApriori(ordered_additives_split, minSupport, minConfidence) printResults(items, rules)
if __name__ == "__main__": alpha = 1.2 p = 0.8 mode = 'original' # original dataset #mode = 'bin' # binarized dataset #mode = 's-bin' # semi-binarized dataset inFile = dataFromFile('adult.txt', mode=mode) itemSet, transactionList = getItemSetTransactionList(inFile) minSupport, minConfidence = 0.09, 0.6 print 'Apriori is running' items, rules, freqSet = runApriori(itemSet, transactionList, minSupport, minConfidence) printResults(items, rules) print 'number of frequent itemsets:', len(items) print 'number of frequent association rules:', len(rules) DI_s = frozenset(["sex: Female", "marital-status: Never-married"]) #DI_s = frozenset(["sex: Female", "age: <=30"]) #DI_s = frozenset(["marital-status: Not-Married", "education: No-Degree"]) print 'discriminatory itemset:',DI_s MRs, PRs = get_MRs(rules, alpha, DI_s) print 'num of alpha-discriminatory rules', len(MRs) print 'num of alpha-protective rules', len(PRs) Rs, NRs = get_PRs(rules, freqSet, alpha, DI_s) print 'num of redlining rules and non-redlining', len(Rs)
for row_ind in range(len(csv_data)): for col_ind in range(len(csv_data.iloc[0, :])): if type(csv_data.iloc[row_ind, col_ind]) == float: csv_data.iloc[row_ind, col_ind] = float('NaN') csv_data.to_csv('statuscodes_toleranceRange=' + str(toleranceRange) + '.csv', header=False, index=False) #%% _end of making it look nicely #%% start o the apriori algorythm inFile = dataFromFile('statuscodes_toleranceRange=' + str(toleranceRange) + '.csv') #inFile = dataFromFile('statuscodes.csv') items, rules = runApriori(inFile, minSupport, minConfidence) #%% end o the apriori algorythm #items=tempitems[:64] #rules=temprules[:70] #%% saving all the rules to a .txt file list_of_tuples = rules f = open( 'rules_toleranceRange=' + str(toleranceRange) + '_MinSupport=' + str(minSupport) + '_MinConfidence=' + str(minConfidence) + '_maxFailure=' + str(maxFailure) + '_minFailure=' + str(minFailure) + '.txt', 'w') for t in list_of_tuples: line = ' '.join(str(x) for x in t) f.write(line + '\n') f.close()
# ''' # try: # apri_indi_set = indicator_classify(inFile,buckets_cls) # inFile = apriori.dataFromList(apri_indi_set) # items, rules = apriori.runApriori(inFile, minSupport, minConfidence) # apriori.printResults(items, rules) # except Exception,e: # logging.error("apriori api error",e) # else: # logging.info("apriori api has execute successfully ") full_name = os.path.realpath(inFile) cfg_file_name = get_cfg_filename(full_name) pos = full_name.find(".txt") result_name = full_name[:pos] + "_result.txt" logging.info("start apriori!") try: #logging.info("in try!") #logging.info("inFile",str(inFile)) apri_indi_set = indicator_classify(inFile, buckets_cls) print "excute apriori algorithm" logging.info("excuting apriori!") rows_file = apriori.dataFromList(apri_indi_set) items, rules = apriori.runApriori(rows_file, minSupport, minConfidence) apriori.printResults(items, rules, result_name) except Exception, e: logging.error("apriori api error", str(e)) else: logging.info("apriori api has execute successfully ") print "End!!"
import json from apriori import runApriori, printResults def generateItemsets(items): for item in items: if ('ingredient_ids' in item): yield item['ingredient_ids'] data = json.load(open('../bbc_ingredients/bbc_crawl.json', 'r')); ingredients = generateItemsets(data) print 'Computing apriori' items, rules = runApriori(ingredients, 0.00, 0.80) printResults(items, rules)
db = client1.cmpe281 coll = db.recommend cursor = db.recommend.find() client2 = MongoClient("192.168.99.100:27017") db2 = client2.cmpe281 minSupport = 3 transactions = [] for document in cursor: transactions.append(document['cart'].split(",")) # print(transactions) items, rules = runApriori(transactions, 0.10, 0.68) print(rules[0]) rule = {} for rule in rules: for item in rule: if type(item) is tuple: cart = ",".join(item[0]) recommend = ",".join(item[1]) rule = {"cart": cart, "recommend": recommend} # print(rule) if db.rules.find(rule).count() > 0: print("document already exists") else:
import json from apriori import runApriori, printResults def generateItemsets(items): for item in items: if ('ingredient_ids' in item): yield item['ingredient_ids'] data = json.load(open('../bbc_ingredients/bbc_crawl.json', 'r')) ingredients = generateItemsets(data) print 'Computing apriori' items, rules = runApriori(ingredients, 0.00, 0.80) printResults(items, rules)
# items, rules = apriori.runApriori(inFile, minSupport, minConfidence) # apriori.printResults(items, rules) # except Exception,e: # logging.error("apriori api error",e) # else: # logging.info("apriori api has execute successfully ") full_name = os.path.realpath(inFile) cfg_file_name = get_cfg_filename(full_name) pos = full_name.find(".txt") result_name = full_name[:pos] + "_result.txt" logging.info("start apriori!") try: #logging.info("in try!") #logging.info("inFile",str(inFile)) apri_indi_set = indicator_classify(inFile,buckets_cls) print "excute apriori algorithm" logging.info("excuting apriori!") rows_file = apriori.dataFromList(apri_indi_set) items, rules = apriori.runApriori(rows_file, minSupport, minConfidence) apriori.printResults(items, rules,result_name) except Exception,e: logging.error("apriori api error",str(e)) else: logging.info("apriori api has execute successfully ") print "End!!"
from __future__ import division from pymongo import MongoClient from bson.code import Code from apriori import runApriori, printResults client = MongoClient() db = client["off"] products = db["products"] product_ingredients = [] total_ingredients = 0 for p in products.find(): key = "ingredients_tags" if p.has_key(key) and len(p[key]) > 0: product_ingredients.append(p[key]) total_ingredients += len(p[key]) print("Total products with ingredients: {}".format(len(product_ingredients))) print("Total number of recorded ingredients: {}".format(total_ingredients)) print("Average number of ingredients per product: {}".format( len(product_ingredients) / total_ingredients)) minSupport = 0.2 minConfidence = 0.7 items, rules = runApriori(product_ingredients, minSupport, minConfidence) printResults(items, rules)