def buildTree(self,transactionDatabase): master = FPTree() for transaction in transactionDatabase: #print transaction master.add(transaction) return master
def find_frequent_itemsets(transactions, minimum_support, include_support=False): """ Find frequent itemsets in the given transactions using FP-growth. This function returns a generator instead of an eagerly-populated list of items. The `transactions` parameter can be any iterable of iterables of items. `minimum_support` should be an integer specifying the minimum number of occurrences of an itemset for it to be accepted. Each item must be hashable (i.e., it must be valid as a member of a dictionary or a set). If `include_support` is true, yield (itemset, support) pairs instead of just the itemsets. """ items = defaultdict(lambda: 0) # mapping from items to their supports processed_transactions = [] # Load the passed-in transactions and count the support that individual # items have. for transaction in transactions: processed = [] for item in transaction: items[item] += 1 processed.append(item) processed_transactions.append(processed) # Remove infrequent items from the item support dictionary. items = dict((item, support) for item, support in items.iteritems() if support >= minimum_support) # Build our FP-tree. Before any transactions can be added to the tree, they # must be stripped of infrequent items and their surviving items must be # sorted in decreasing order of frequency. def clean_transaction(transaction): transaction = filter(lambda v: v in items, transaction) transaction.sort(key=lambda v: items[v], reverse=True) return transaction master = FPTree() for transaction in imap(clean_transaction, processed_transactions): master.add(transaction) def find_with_suffix(tree, suffix): for item, nodes in tree.items(): support = sum(n.count for n in nodes) if support >= minimum_support and item not in suffix: # New winner! found_set = [item] + suffix yield (found_set, support) if include_support else found_set # Build a conditional tree and recursively search for frequent # itemsets within it. cond_tree = conditional_tree_from_paths( tree.prefix_paths(item), minimum_support) for s in find_with_suffix(cond_tree, found_set): yield s # pass along the good news to our caller # Search for frequent itemsets, and yield the results we find. for itemset in find_with_suffix(master, []): yield itemset
# miner.mine() if __name__ == "__main__": database = TransactionDatabase.loadFromFile("./data/train_adt.csv", ['97'], 100) data = TransactionDatabase.loadFromFile("./data/train_adt.csv", ['97'], 1) data1 = TransactionDatabase.loadFromFile("./data/test_adt.csv", ['97'], 1) # database.cleanAndPrune(2) # print ("Cleaned database:") # for transaction in database.transactions: # print(str(transaction.label)) # print ("\nItems in FP tree and corresponding nodes:") tree = FPTree() for t in database: tree.add(t) # print(str(tree)) miner = DDPMine(debug=True) start = time.clock() Pt = miner.mine(database, 100) elapsed = time.clock() - start print("Time Total:%f" % elapsed) print(Pt) for row in Pt: print("Pattern:%s label:%s" % (row[0], row[1])) for row in Pt: lb1 = 0 lb2 = 0 for transaction in data.transactions: