示例#1
0
    def buildTree(self,transactionDatabase):

        master = FPTree()
        for transaction in transactionDatabase:
            #print transaction
            master.add(transaction)

        return master
def find_frequent_itemsets(transactions,
                           minimum_support,
                           include_support=False):
    """
    Find frequent itemsets in the given transactions using FP-growth. This
    function returns a generator instead of an eagerly-populated list of items.

    The `transactions` parameter can be any iterable of iterables of items.
    `minimum_support` should be an integer specifying the minimum number of
    occurrences of an itemset for it to be accepted.

    Each item must be hashable (i.e., it must be valid as a member of a
    dictionary or a set).

    If `include_support` is true, yield (itemset, support) pairs instead of
    just the itemsets.
    """
    items = defaultdict(lambda: 0)  # mapping from items to their supports
    processed_transactions = []

    # Load the passed-in transactions and count the support that individual
    # items have.
    for transaction in transactions:
        processed = []
        for item in transaction:
            items[item] += 1
            processed.append(item)
        processed_transactions.append(processed)

    # Remove infrequent items from the item support dictionary.
    items = dict((item, support) for item, support in items.iteritems()
                 if support >= minimum_support)

    # Build our FP-tree. Before any transactions can be added to the tree, they
    # must be stripped of infrequent items and their surviving items must be
    # sorted in decreasing order of frequency.
    def clean_transaction(transaction):
        transaction = filter(lambda v: v in items, transaction)
        transaction.sort(key=lambda v: items[v], reverse=True)
        return transaction

    master = FPTree()
    for transaction in imap(clean_transaction, processed_transactions):
        master.add(transaction)

    def find_with_suffix(tree, suffix):
        for item, nodes in tree.items():
            support = sum(n.count for n in nodes)
            if support >= minimum_support and item not in suffix:
                # New winner!
                found_set = [item] + suffix
                yield (found_set, support) if include_support else found_set

                # Build a conditional tree and recursively search for frequent
                # itemsets within it.
                cond_tree = conditional_tree_from_paths(
                    tree.prefix_paths(item), minimum_support)
                for s in find_with_suffix(cond_tree, found_set):
                    yield s  # pass along the good news to our caller

    # Search for frequent itemsets, and yield the results we find.
    for itemset in find_with_suffix(master, []):
        yield itemset
示例#3
0
#     miner.mine()

if __name__ == "__main__":

    database = TransactionDatabase.loadFromFile("./data/train_adt.csv", ['97'],
                                                100)
    data = TransactionDatabase.loadFromFile("./data/train_adt.csv", ['97'], 1)
    data1 = TransactionDatabase.loadFromFile("./data/test_adt.csv", ['97'], 1)
    # database.cleanAndPrune(2)
    # print ("Cleaned database:")
    # for transaction in database.transactions:
    #     print(str(transaction.label))
    # print ("\nItems in FP tree and corresponding nodes:")
    tree = FPTree()
    for t in database:
        tree.add(t)

    # print(str(tree))
    miner = DDPMine(debug=True)
    start = time.clock()
    Pt = miner.mine(database, 100)
    elapsed = time.clock() - start
    print("Time Total:%f" % elapsed)
    print(Pt)
    for row in Pt:
        print("Pattern:%s  label:%s" % (row[0], row[1]))

    for row in Pt:
        lb1 = 0
        lb2 = 0
        for transaction in data.transactions: