示例#1
0
def test_tree_iter():
    tree = FPTree()
    (item_count, _) = count_item_frequency_in(test_transactions)
    expected = Counter()
    for transaction in test_transactions:
        sort_transaction(transaction, item_count)
        tree.insert(transaction)
        expected[frozenset(transaction)] += 1
    observed = Counter()
    for (transaction, count) in tree:
        observed[frozenset(transaction)] += count
    assert (expected == observed)
示例#2
0
def test_tree_iter():
    tree = FPTree()
    item_count = count_item_frequency_in(test_transactions)
    expected = Counter()
    for transaction in [list(map(Item, t)) for t in test_transactions]:
        sort_transaction(transaction, item_count)
        tree.insert(transaction)
        expected[frozenset(transaction)] += 1
    stored_transactions = set()
    observed = Counter()
    for (transaction, count) in tree:
        observed[frozenset(transaction)] += count
    assert(expected == observed)
示例#3
0
def tree_global_change(tree, other_item_count):
    assert (tree.is_sorted())
    change = 0.0
    for (path, count) in tree:
        sorted_path = sort_transaction(path, other_item_count)
        distance = levenstein_distance(path, sorted_path)
        change += (distance**2) / (len(path)**2)
    return change / tree.num_transactions
示例#4
0
def mine_cp_tree_stream(transactions, min_support, sort_interval, window_size):
    # Yields (window_start_index, window_length, patterns)
    tree = FPTree()
    sliding_window = deque()
    frequency = None
    num_transactions = 0
    for transaction in transactions:
        num_transactions += 1
        transaction = sort_transaction(map(Item, transaction), frequency)
        tree.insert(transaction)
        sliding_window.append(transaction)
        if len(sliding_window) > window_size:
            transaction = sliding_window.popleft()
            transaction = sort_transaction(transaction, frequency)
            tree.remove(transaction, 1)
            assert (len(sliding_window) == window_size)
            assert (tree.num_transactions == window_size)
        if (num_transactions % sort_interval) == 0:
            tree.sort()
            frequency = tree.item_count.copy()
        if (num_transactions % window_size) == 0:
            if (num_transactions % sort_interval) != 0:
                # We won't have sorted due to the previous check, so we
                # need to sort before mining.
                tree.sort()
                frequency = tree.item_count.copy()
            assert (tree.num_transactions == len(sliding_window))
            assert (len(sliding_window) == window_size)
            min_count = min_support * tree.num_transactions
            patterns = fp_growth(tree, min_count, [])
            yield (num_transactions - len(sliding_window), len(sliding_window),
                   patterns)
    else:
        # We didn't just mine on the last transaction, we need to mine now,
        # else we'll miss data.
        if (num_transactions % window_size) != 0:
            if (num_transactions % sort_interval) != 0:
                tree.sort()
                frequency = tree.item_count.copy()
            min_count = min_support * tree.num_transactions
            patterns = fp_growth(tree, min_count, [])
            yield (num_transactions - len(sliding_window), len(sliding_window),
                   patterns)
示例#5
0
def build_tree(window, item_count):
    path_len_sum = 0
    path_count = 0
    tree = FPTree()
    for bucket in window:
        for (transaction, count) in bucket.tree:
            sorted_transaction = sort_transaction(transaction, item_count)
            path_len_sum += count * len(sorted_transaction)
            path_count += count
            tree.insert(sorted_transaction, count)
    avg_path_len = path_len_sum / path_count
    return (tree, avg_path_len)
示例#6
0
 def append(self, other_bucket):
     for (transaction, count) in other_bucket.tree:
         self.tree.insert(
             sort_transaction(transaction, self.sorting_counter), count)
     self.tree.sort()  # TODO: Is this necessary?
     self.sorting_counter = self.tree.item_count.copy()
示例#7
0
 def add(self, transaction):
     self.tree.insert(sort_transaction(transaction, self.sorting_counter))