Exemplo n.º 1
0
def experiment(transactions, epsilon, delta, mu, supp, rep):
    results = []
    for r in xrange(rep):
        ts = time.time()

        # Compute the bounds
        t_bound = toivonen_bound(epsilon, delta)
        d_bound = dbound_bound(len(transactions), epsilon, delta,
                               float(d_index), 0.5)
        print(
            '[+] Performing experiment {}/{}. (epsilon: {}, delta: {}, mu: {}, support: {})'
            .format(r + 1, rep, epsilon, delta, mu, supp))

        # Draw a sample --- Toivonen's bound
        sample = sample_n(transactions, t_bound)
        threshold = toivonen_threshold(len(sample), mu)
        print('[|] Toivonen\'s bound sample size: {:,} ({})'.format(
            len(sample), threshold))
        t_fi = fim(sample, supp=supp, zmin=2)
        t_fi_size = len(t_fi)
        print('[|] Frequent itemsets found {}'.format(t_fi_size))

        # Draw a sample --- d-bound
        sample = sample_n(transactions, d_bound)
        print('[|] d-bound sample size: {:,}'.format(len(sample)))
        d_fi = fim(sample, supp=supp, zmin=2)
        d_fi_size = len(d_fi)
        print('[|] Frequent itemsets found {}'.format(d_fi_size))

        print('[+] Comparing the frequent itemsets')
        fi_common = compare_fis(t_fi, d_fi)
        fi_common_size = len(fi_common)
        percent = numpy.mean([
            percentage(t_fi_size, fi_common_size),
            percentage(d_fi_size, fi_common_size)
        ])
        print('[|] {} frequent itemsets in common ({:.2f}%)'.format(
            fi_common_size, percent))

        # Finish the experiment
        te = time.time()
        milliseconds = (te - ts) * 1000.0
        print('[+] Experiment duration: {:.4f} ms'.format(milliseconds))
        print('')
        results.append([
            epsilon, delta, mu, t_bound, t_fi_size, threshold, d_bound,
            d_fi_size, fi_common_size, percent, milliseconds
        ])

    return numpy.mean(results, axis=0).tolist()
Exemplo n.º 2
0
def _get_frequent_candidates(transactions, min_support, max_words):
    """
    Runs an frequent itemset mining algorithms on the transactions
        Input:
            transactions: list of tuples representing items on each transaction
            min_support: minimum support of an item set (positive: percentage,
                         negative: absolute number)
            max_words: maximum number of items in each frequent set
        Output:
            list of extracted frequent itemset in the form (itemsets, support)
    """
    return fim.fim(transactions, supp=min_support, zmax=max_words)
Exemplo n.º 3
0
        # print(test_pids_cat2[0])
        # print(test_known_tracks[test_pids_cat2[0]])
        # print([x[1] for x in test_known_tracks[test_pids_cat2[0]]])
        #
        # print("start")
        sequences = urm_to_sequences(urm_pos=dr.get_position_matrix(position_type='last'),
                                     target_list=[x[1] for x in test_known_tracks[test_pids_cat2[0]]],
                                     min_common=1)


        # for s in sequences: print(s)
        # for s in sequences[0:2]:
        #     print("seuences:", s)

        # print("maximal")
        seq = fim(sequences[0:2], target='maximal', supp=-2, zmin=2, report='a')
        # for s in seq:
        #     print("max>", s)

        # print("normale")
        # seq = fim(sequences[0:10],  supp=-2, zmin=2, report='a')
        # print("norm", seq)

        # print("prefixspan")
        sequences_for_prefix = urm_to_sequences(urm_pos=dr.get_position_matrix(position_type='last'),
                                             target_list=[x[1] for x in test_known_tracks[test_pids_cat2[0]]],
                                            min_common=1,
                                            list_of_list_of_listss=True)

        model = PrefixSpan.train(sequences_for_prefix, minSupport=0.1, maxPatternLength=250)
        result = model.freqSequences().collect()
    # --- save generated pattern spectrum ---
    if cnt > 0 and pspfn != "": # if file name for pattern spectrum
        t = time()              # start timer, print log message
        stderr.write('writing %s ... ' % pspfn)
        with open(pspfn, 'w') as out: # write pattern spectrum to a file
            for s in sorted([(z,c,psp[z,c]) for z,c in psp]):
                out.write(('%d'+pssep+'%d'+pssep+'%.16g\n') % s)
        stderr.write('[%d signature(s)]' % len(psp))
        stderr.write(' done [%.2fs].\n' % (time()-t))

    # --- analyze original data set ---
    if len(args) < 2: exit()    # check for an output file name
    t = time()                  # start timer, print log message
    stderr.write('analyzing original data ... ')
    pats = fim(tracts, target, supp, zmin, zmax, 'a', border=border)
    stderr.write('[%d pattern(s)]' % len(pats))
    stderr.write(' done [%.2fs].\n' % (time()-t))

    # --- pattern set reduction ---
    if pred != 'x':             # if to filter with pattern spectrum
        t = time()              # start timer, print log message
        stderr.write('reducing pattern set ... ')
        pats = patred(pats, pred, border, False)
        stderr.write('[%d pattern(s)]' % len(pats))
        stderr.write(' done [%.2fs].\n' % (time()-t))

    # --- write output file ---
    t = time()                  # start timer, print log message
    stderr.write('writing %s ... ' % args[1])
    with open(args[1], 'w') as out:
Exemplo n.º 5
0
tid = int(argv[1])
if tid < -2:
    print(fpgrowth.__doc__)
elif tid < -1:
    print(eclat.__doc__)
elif tid < 0:
    print(apriori.__doc__)
else:
    tracts = [[1, 2, 3], [1, 4, 5], [2, 3, 4], [1, 2, 3, 4], [2, 3], [1, 2, 4],
              [4, 5], [1, 2, 3, 4], [3, 4, 5], [1, 2, 3]]
    print('transactions:')
    for t in tracts:
        print(t)
    if tid < 1:
        print('apriori(tracts, supp=-3, zmin=2):')
        for r in apriori(tracts, supp=-3, zmin=2):
            print r
    elif tid < 2:
        print('eclat(tracts, supp=-3, zmin=2):')
        for r in eclat(tracts, supp=-3, zmin=2):
            print r
    elif tid < 3:
        print('fpgrowth(tracts, supp=-3, zmin=2):')
        for r in fpgrowth(tracts, supp=-3, zmin=2):
            print r
    else:
        print('fim(tracts, supp=-3, zmin=2, report=\'#\'):')
        for r in fim(tracts, supp=-3, zmin=2, report='#'):
            print r
Exemplo n.º 6
0
    print(fpgrowth.__doc__)
elif tid < -1:
    print(eclat.__doc__)
elif tid <  0:
    print(apriori.__doc__)
else:
    tracts = [ [ 1, 2, 3 ],
               [ 1, 4, 5 ],
               [ 2, 3, 4 ],
               [ 1, 2, 3, 4 ],
               [ 2, 3 ],
               [ 1, 2, 4 ],
               [ 4, 5 ],
               [ 1, 2, 3, 4 ],
               [ 3, 4, 5 ],
               [ 1, 2, 3 ] ]
    print('transactions:')
    for t in tracts: print(t)
    if   tid < 1:
        print  ('apriori(tracts, supp=-3, zmin=2):')
        for r in apriori(tracts, supp=-3, zmin=2): print r
    elif tid < 2:
        print  ('eclat(tracts, supp=-3, zmin=2):')
        for r in eclat(tracts, supp=-3, zmin=2): print r
    elif tid < 3:
        print  ('fpgrowth(tracts, supp=-3, zmin=2):')
        for r in fpgrowth(tracts, supp=-3, zmin=2): print r
    else:
        print  ('fim(tracts, supp=-3, zmin=2, report=\'#\'):')
        for r in fim(tracts, supp=-3, zmin=2, report='#'): print r
Exemplo n.º 7
0
#human initial state
I0 = data_case[0] / abs(opt_repH)
E0 = 2 * data_case[0] / abs(opt_repH)
S0 = 1.0 - I0 - E0
#mosquito initial state
A0 = 1.0
S0_m = 1.0 / abs(opt_mu_m)
E0_m = 0.0
I0_m = 0.0
ini = [S0, E0, I0, A0, S0_m, E0_m, I0_m]
time_step = np.array(range(0, 7 * (53 - sim_start), 7))

#simulated case data
dengue = dengue_model()
opt_res = dengue.ode_run(dengue.model, ini, time_step, param)
res_inc_t1 = np.append(np.array([0]),
                       spi.cumtrapz(abs(opt_repH) * alpha * opt_res[:, 1]))
res_inc_t0 = np.append(np.array([0]),
                       spi.cumtrapz(abs(opt_repH) * alpha * opt_res[0:-1, 1]))
data_mw = 7 * (res_inc_t1 + 6.0 / 7.0) - 7 * np.append(np.array(
    [0]), (res_inc_t0 + 6.0 / 7.0))
#print 'data_mw', data_mw

#fisher information matrix
fim_dengue = fim(dengue.model, time_step)
fim = fim_dengue.Fim_x(param, data_mw)  #default pert = 0.01
fim_r = fim_dengue.Fim_rank(fim)  #set the default tolerance to match matlab
print fim
print 'rank: ', fim_r
Exemplo n.º 8
0
#-----------------------------------------------------------------------

if __name__ == '__main__':
    runs   = int(argv[1]) if len(argv) > 1 else 1
    tracts = [[i+1 for i in range(100) if random() < 0.1]
                   for k in range(1000)]
    with open('data.txt', 'w') as out:
        for t in tracts:
            for i in t: out.write('%d ' % i)
            out.write('\n')

    stderr.write('frequent item sets:\n')
    stderr.write('fim    ... '); t = time()
    for r in range(runs):
        pypats = fim(tracts, supp=-2, zmin=2, report='a')
    stderr.write('done [%.3fs].\n' % (time()-t))
    ref = set([(tuple(sorted(list(s))), x) for s,x in pypats])
    stderr.write('\n')
    
    for p,f in [('apriori',  apriori),
                ('eclat',    eclat),
                ('fpgrowth', fpgrowth),
                ('sam',      sam),
                ('relim',    relim)]:
        stderr.write(p +' (all)\n')
        stderr.write('python ... '); t = time()
        for r in range(runs):
            pypats = f(tracts, supp=-2, zmin=2, report='a')
        stderr.write('done [%.3fs].\n' % (time()-t))