Пример #1
0
def speeds():
    """
    Test the naive rule finder vs. the simple one from the paper.
    """
    import random
    random.seed(123456)
    transactions = generate_transactions(num_transactions=random.randint(
        250, 500),
                                         unique_items=random.randint(8, 9),
                                         items_row=(10, 50))

    itemsets, num_transactions = itemsets_from_transactions(transactions, 0.1)
    import time
    min_conf = 0.5

    print(itemsets)

    st = time.perf_counter()
    rules_apri = generate_rules_apriori(itemsets, min_conf, num_transactions)
    rules_apri = list(rules_apri)
    time_formatted = round(time.perf_counter() - st, 40)
    print('Fast apriori ran in {} s'.format(time_formatted))

    st = time.perf_counter()
    rules_simple = generate_rules_simple(itemsets, min_conf, num_transactions)
    rules_simple = list(rules_simple)
    time_formatted = round(time.perf_counter() - st, 40)
    print('Simple apriori ran in {} s'.format(time_formatted))

    st = time.perf_counter()
    rules_naive = generate_rules_naively(itemsets, min_conf, num_transactions)
    rules_naive = list(rules_naive)
    time_formatted = round(time.perf_counter() - st, 40)
    print('Naive apriori ran in {} s'.format(time_formatted))
Пример #2
0
def test_generate_rules_apriori_large():
    """
    Test with lots of data.
    This test will fail if the second argument to `_ap_genrules` is not 
    validated as non-empty before the recursive function call. We must have
    if H_m_copy:
        yield from _ap_genrules
    for this test to pass.
    """

    transactions = generate_transactions(num_transactions=100,
                                         unique_items=30,
                                         items_row=(1, 20),
                                         seed=123)

    itemsets, num_transactions = itemsets_from_transactions(transactions, 0.1)

    min_conf = 0.3
    rules_apri = generate_rules_apriori(itemsets, min_conf, num_transactions)
    rules_naive = generate_rules_naively(itemsets, min_conf, num_transactions)
    rules_apri = list(rules_apri)
    rules_naive = list(rules_naive)

    # Test equal length, since no duplicates should be returned by apriori
    assert len(rules_apri) == len(rules_naive)

    # Test equal results
    assert set(rules_apri) == set(rules_naive)
Пример #3
0
    rules_apri = generate_rules_apriori(itemsets, min_conf, num_transactions)
    rules_naive = generate_rules_naively(itemsets, min_conf, num_transactions)
    rules_apri = list(rules_apri)
    rules_naive = list(rules_naive)

    # Test equal length, since no duplicates should be returned by apriori
    assert len(rules_apri) == len(rules_naive)

    # Test equal results
    assert set(rules_apri) == set(rules_naive)


input_data = [
    list(
        generate_transactions(num_transactions=random.randint(15, 25),
                              unique_items=random.randint(1, 8),
                              items_row=(1, random.randint(2, 6))))
    for i in range(10)
]


@pytest.mark.parametrize("transactions", input_data)
def test_generate_rules_simple_vs_naive(transactions):
    """
    Test the naive rule finder vs. the simple one from the paper.
    """

    itemsets, num_transactions = itemsets_from_transactions(transactions, 0.25)

    min_conf = 0.1
    rules_naive = generate_rules_naively(itemsets, min_conf, num_transactions)