Пример #1
0
 #=======================================================================
 #===========================================================================
 # 输出sub_catlist中的频繁项,运用fp_growth算法
 #===========================================================================
 catfreq_list, support_data_cat = fp_growth.fptree(sub_catlist, int(min_support*len(sub_catlist)))
 #=======================================================================
 # print('catfreq_list is: ')
 # for i in range(len(catfreq_list)):
 #     print(catfreq_list[i])
 # print('-----------------------------------------')
 #=======================================================================
 #===========================================================================
 # for i in support_data.items():
 #     print(i)
 #===========================================================================
 big_rule_list = apriori.generate_rules(catfreq_list[0:2], support_data_cat, min_confidence)
 #print('rule list follows: ')
 cat_rule_list = []
 for rule in big_rule_list:
     if test_one_category in rule[0] and len(rule[0]) == 1:
         cat_rule_list.append(rule)
         #print(rule) 
 print('-----------------------------------------')
 cat_rule_list.sort(key= lambda p: p[2], reverse=True)
 #===================================================================
 # print('cat_rule_list is :')
 # for i in cat_rule_list:
 #     print(i)
 # print('-----------------------------------------')
 #===================================================================
 #只取类型关联集的前两个类
Пример #2
0
import apriori


sample_transactions = [
    ['fish', 'white wine', 'cheese', 'bread'],
    ['beer', 'nachos', 'cheese', 'peanuts'],
    ['white wine', 'cheese'],
    ['white wine', 'cheese', 'bread']
]

for rule in apriori.generate_rules(sample_transactions, min_support=0.5):
    msg = (f'{rule.format_rule():20s}\t\t'
           f'(support={rule.support:0.4f}, confidence={rule.confidence:0.4f}, lift={rule.lift:0.4f})')
    print(msg)


print("Example with string info. Note that everyone has a cat (lots of support, but lift is 1 as cat has no info)")
pet_ownership = [['cat', 'dog'], ['cat', 'fish'], ['cat', 'dog', 'fish'], ['cat', 'fish', 'horse'],
                 ['cat', 'horse', 'dog'], ['cat', 'horse', 'dog'], ['cat', 'horse', 'dog']]

for rule in apriori.generate_rules(pet_ownership, min_support=0.2, min_lift=1.05):
    msg = (f'{rule.format_rule():20s}\t\t'
           f'(support={rule.support:0.4f}, confidence={rule.confidence:0.4f}, lift={rule.lift:0.4f})')
    print(msg)
Пример #3
0
def recommend(recommendationfunction='trail', inputverbs=None, **kwargs):
    '''Generate new recommendation rules for every assignment that is of some
    importance, based on completed and launched media, questions, assessments.

    Note that only questions and media may be recommended, as the filter should
    already make sure that the found questions/media are only relevant for the
    current assessment.

    TODO:
      - Check if milestone passed
      - Decay for past milestones + assigments
      - Alter filter for smaller query, perhaps multiple queries?

    Don't try to run if
      1. Top X will not be altered, based on people submitted/threshold conf/sup

    '''

    # By default, get all info from users who completed
    if inputverbs:
        verbs = [TinCan.VERBS[verb]['id'] for verb in inputverbs]
    else:
        verbs = [TinCan.VERBS['completed']['id']]
    max_consequent_size = kwargs['max_consequent_size'] if \
            'max_consequent_size' in kwargs else 1
    verbose = kwargs['verbose'] if 'verbose' in kwargs else False

    milestones = defaultdict(lambda : 'NO_ASSESSMENT')       # progress per actor
    assessment_ids = []
    transactions = dict()
    freq = dict()
    name_description = dict()

    activities = Activity.objects.order_by("-time")
    for activity in activities:
        if activity.verb not in verbs:
            continue
        actor = activity.user
        name_description[activity.activity] = \
                (activity.name,
                 activity.description)

        # Use assessments to separate timeslices per actor
        if activity.type == TinCan.ACTIVITY_TYPES['assessment']:
            assessment_id = activity.activity
            milestones[actor] = assessment_id       # For every milestone:

            # Keep track of all assessments in reverse chronological order as well
            if not assessment_id in assessment_ids:
                transactions[assessment_id] = defaultdict(list) # verbs+objects /actor
                freq[assessment_id] = {0: defaultdict(int)}     # frequency of 1-pairs
                assessment_ids.append(assessment_id)
        else:
            assessment_id = milestones[actor]
            if assessment_id == 'NO_ASSESSMENT':
                continue

            value = activity.value if activity.verb == TinCan.VERBS['progressed']['id'] else 1.0
            statement_obj = activity.activity
            transactions[assessment_id][actor].append((statement_obj, value))
            freq[assessment_id][0][(statement_obj,)] += 1

    # Use baskets as transactions and recommend
    rulebase = []
    for assessment_id in assessment_ids:
        D = transactions[assessment_id]
        L = freq[assessment_id]

        rules = []
        print 'Generating rules for assessment ', assessment_id
        if recommendationfunction == 'apriori':
            minsup = kwargs['minsup']
            minconf = kwargs['minconf']

            rules = apriori.generate_rules(apriori.apriori, D, L, minsup,
                                           minconf, max_consequent_size,
                                           verbose=verbose, veryverbose=False)
        elif recommendationfunction == 'trail':
            gamma = kwargs['gamma']
            minsupp = kwargs['minsup']
            minconf = kwargs['minconf']

            rules = trail.generate_rules(D, gamma, minsupp, minconf,verbose = verbose)

        # Save found rules based on the relevant assessment.
        for ante, conse, confidence, support in rules:
            rule = {'milestone': assessment_id,  # id indicating assessment
                    'antecedent': ante,          # LHS
                    'consequent': conse,         # RHS
                    'confidence': confidence,    # Confidence for LHS->RHS
                    'support': support}          # Support for the rule
            rulebase.append(rule)

    return rulebase, name_description
Пример #4
0
# -*- coding: utf-8 -*-

import apriori
dataset = apriori.load_data_set()
c1 = apriori.create_c1(dataset)
d = map(set, dataset)
ret, support_data = apriori.scan_d(d, c1, 0.5)

import apriori
dataset = apriori.load_data_set()
L, sd = apriori.find_freq_set(dataset, 0.5)
L, sd = apriori.find_freq_set(dataset, 0.7)

import apriori
dataset = apriori.load_data_set()
L, sd = apriori.find_freq_set(dataset, 0.5)
rules = apriori.generate_rules(L, sd, 0.7)

import apriori
dataset = [line.split() for line in open('chapter11/mushroom.dat').readlines()]
L, sd = apriori.find_freq_set(dataset, 0.3)
for itemset in L[1]:
    if itemset.intersection('2'):
        print itemset
Пример #5
0
        if testcategory[0] in catlist[i]:
            sub_catlist.append(catlist[i])

    # ===========================================================================
    # 输出sub_catlist 10行
    # ===========================================================================
    # ===========================================================================
    # for i in range(10):
    #     print(sub_catlist[i])
    print("sub_catlist's length is : ", len(sub_catlist))
    # ===========================================================================

    # ===========================================================================
    # 输出sub_catlist中的频繁项,运用fp_growth算法
    # ===========================================================================
    catfreq_list, support_data = fp_growth.fptree(sub_catlist, 100000)
    print("catfreq_list is: ")
    for i in range(len(catfreq_list)):
        print(catfreq_list[i])

    # ===========================================================================
    # for i in support_data.items():
    #     print(i)
    # ===========================================================================

    big_rule_list = apriori.generate_rules(catfreq_list, support_data, 0.5)
    print("rule list follows: ")
    for rule in big_rule_list:
        if test_one in rule[0] and len(rule[0]) == 1:
            print(rule)