Пример #1
0
 def test_DA_partition_case2(self):
     init_tree()
     trans = [['a1'], ['a1', 'a2'], ['b1', 'b2'], ['b1', 'b2'],
              ['a1', 'a2', 'b2'], ['a1', 'a2', 'b2'],
              ['a1', 'a2', 'b1', 'b2']]
     _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 4)
     # not {'a1': 'A', 'a2': 'A', 'b1': 'B', 'b2': 'B'}
     self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'})
     _, result = apriori_based_anon(ATT_TREE, trans, 'AA', 2, 2)
     self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'})
 def test_AA_with_AA_case(self):
     init_tree()
     trans = [['a1', 'b1', 'b2'],
              ['a2', 'b1'],
              ['a2', 'b1', 'b2'],
              ['a1', 'a2', 'b2']]
     _, result = apriori_based_anon(ATT_TREE, trans, 'AA', 2, 2)
     self.assertEqual(result[2], {'a1': 'A', 'a2': 'A'})
     _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 2)
     self.assertEqual(result[2], {'a1': 'A', 'a2': 'A'})
 def test_DA_partition_case2(self):
     init_tree()
     trans = [['a1'],
              ['a1', 'a2'],
              ['b1', 'b2'],
              ['b1', 'b2'],
              ['a1', 'a2', 'b2'],
              ['a1', 'a2', 'b2'],
              ['a1', 'a2', 'b1', 'b2']]
     _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 4)
     # not {'a1': 'A', 'a2': 'A', 'b1': 'B', 'b2': 'B'}
     self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'})
     _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 2)
     self.assertEqual(result[2], {'b1': 'B', 'b2': 'B'})
Пример #4
0
def get_result_dataset(att_tree,
                       data,
                       type_alg='AA',
                       k=DEFALUT_K,
                       num_test=10):
    """
    fix k, while changing size of dataset
    num_test is the test nubmber.
    """
    print "K=%d" % k
    print "m=%d" % DEFALUT_M
    data_back = copy.deepcopy(data)
    length = len(data_back)
    joint = 5000
    dataset_num = length / joint
    if length % joint == 0:
        dataset_num += 1
    for i in range(1, dataset_num + 1):
        pos = i * joint
        ncp = rtime = 0
        if pos > length:
            continue
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(num_test):
            temp = random.sample(data, pos)
            _, eval_result = apriori_based_anon(att_tree, temp, type_alg, k)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
        ncp /= num_test
        rtime /= num_test
        print "Average NCP %0.2f" % ncp + "%"
        print "Running time %0.2f" % rtime + " seconds"
        print '#' * 30
def get_result_dataset(att_tree, data, type_alg='AA', k=DEFALUT_K, num_test=10):
    """
    fix k, while changing size of dataset
    num_test is the test nubmber.
    """
    print "K=%d" % k
    print "m=%d" % DEFALUT_M
    data_back = copy.deepcopy(data)
    length = len(data_back)
    joint = 5000
    dataset_num = length / joint
    if length % joint == 0:
        dataset_num += 1
    for i in range(1, dataset_num + 1):
        pos = i * joint
        ncp = rtime = 0
        if pos > length:
            continue
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(num_test):
            temp = random.sample(data, pos)
            _, eval_result = apriori_based_anon(att_tree, temp, type_alg, k)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
        ncp /= num_test
        rtime /= num_test
        print "Average NCP %0.2f" % ncp + "%"
        print "Running time %0.2f" % rtime + " seconds"
        print '#' * 30
Пример #6
0
def get_result_one(att_tree, data, type_alg, k=DEFALUT_K):
    """
    run apriori_based_anon for one time, with k=10
    """
    print "K=%d" % k
    print "Size of Data", len(data)
    print "m=%d" % DEFALUT_M
    _, eval_result = apriori_based_anon(att_tree, data, type_alg, k)
    print "NCP %0.2f" % eval_result[0] + "%"
    print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_one(att_tree, data, type_alg, k=DEFALUT_K):
    """
    run apriori_based_anon for one time, with k=10
    """
    print "K=%d" % k
    print "Size of Data", len(data)
    print "m=%d" % DEFALUT_M
    _, eval_result = apriori_based_anon(att_tree, data, type_alg, k)
    print "NCP %0.2f" % eval_result[0] + "%"
    print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #8
0
def get_result_k(att_tree, data, type_alg):
    """
    change k, whle fixing size of dataset
    """
    data_back = copy.deepcopy(data)
    # for k in range(5, 105, 5):
    print "m=%d" % DEFALUT_M
    print "Size of Data", len(data)
    for k in [2, 5, 10, 25, 50]:
        print '#' * 30
        print "K=%d" % k
        result, eval_result = apriori_based_anon(att_tree, data, type_alg, k)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_m(att_tree, data, type_alg, k=DEFALUT_K):
    """
    change k, whle fixing size of dataset
    """
    print "K=%d" % k
    print "Size of Data", len(data)
    data_back = copy.deepcopy(data)
    # for m in range(1, 100, 5):
    for m in [1, 2, 3, 4, 5, M_MAX]:
        print '#' * 30
        print "m=%d" % m
        result, eval_result = apriori_based_anon(att_tree, data, type_alg, k, m)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
def get_result_k(att_tree, data, type_alg):
    """
    change k, whle fixing size of dataset
    """
    data_back = copy.deepcopy(data)
    # for k in range(5, 105, 5):
    print "m=%d" % DEFALUT_M
    print "Size of Data", len(data)
    for k in [2, 5, 10, 25, 50]:
        print '#' * 30
        print "K=%d" % k
        result, eval_result = apriori_based_anon(att_tree, data, type_alg, k)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #11
0
def get_result_m(att_tree, data, type_alg, k=DEFALUT_K):
    """
    change k, whle fixing size of dataset
    """
    print "K=%d" % k
    print "Size of Data", len(data)
    data_back = copy.deepcopy(data)
    # for m in range(1, 100, 5):
    for m in [1, 2, 3, 4, 5, M_MAX]:
        print '#' * 30
        print "m=%d" % m
        result, eval_result = apriori_based_anon(att_tree, data, type_alg, k,
                                                 m)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
def T_Gen(trans, k=25, m=2):
    """transaction generalization based on AA
    """
    # using AA to generalization transaction part
    result, eval_result = apriori_based_anon(ATT_TREES[-1], trans, 'AA', k, m)
    return result, eval_result
Пример #13
0
def T_Gen(trans, k=25, m=2):
    """transaction generalization based on AA
    """
    # using AA to generalization transaction part
    result, eval_result = apriori_based_anon(ATT_TREES[-1], trans, 'AA', k, m)
    return result, eval_result
Пример #14
0
 def test_DA(self):
     init_tree()
     trans = [['a1', 'b1', 'b2'], ['a2', 'b1'], ['a2', 'b1', 'b2'],
              ['a1', 'a2', 'b2']]
     _, result = apriori_based_anon(ATT_TREE, trans, 'DA', 2, 2)
     self.assertEqual(result[2], {'a1': 'A', 'a2': 'A'})