Пример #1
0
def get_result_dataset(att_trees, data, l=5, num_test=10):
    """
    fix l and QI, while changing size of dataset
    num_test is the test nubmber.
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    joint = 5000
    dataset_num = length / joint
    print "L=%d" % l
    if length % joint == 0:
        dataset_num += 1
    for i in range(1, dataset_num + 1):
        pos = i * joint
        ncp = rtime = 0
        if pos > length:
            continue
        print "#" * 30
        print "size of dataset %d" % pos
        for j in range(num_test):
            temp = random.sample(data, pos)
            _, eval_result = mondrian_l_diversity(att_trees, temp, l)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
        ncp /= num_test
        rtime /= num_test
        print "Average NCP %0.2f" % ncp + "%"
        print "Running time %0.2f" % rtime + " seconds"
        print "#" * 30
Пример #2
0
 def test_mondrian_l_diversity(self):
     init_tree()
     att_trees = [ATT_TREE]
     data = [['a1', 'a1'], ['b2', 'a1'], ['b1', 'b1'], ['b2', 'b1'],
             ['b1', 'b2'], ['b2', 'b2'], ['a1', 'a2']]
     result, eval_result = mondrian_l_diversity(att_trees, data, 2)
     self.assertTrue(abs(eval_result[0] - 0) <= 0.001)
def Separation_Gen(att_trees, data, k=10, l=5):
    """Using partition_for_transaction to anonymize SA (transaction) partition,
    while applying anatomy to separate QID and SA
    return (result, eval_result)
    result is 2-dimensional list
    eval_result is a tuple (rncp, tncp, rtime)
    """
    global ATT_TREES, DATA
    ATT_TREES = att_trees
    DATA = data
    start_time = time.time()
    if _DEBUG:
        print "size of dataset %d" % len(data)
    result = []
    # copy transaction part of data to trans
    trans = [t[-1] for t in data]
    # anonymize transaction part with partition algorithm
    trans_set, sa_ncp = partition(att_trees[-1], trans, k)
    partition_data = []
    for ttemp in trans_set:
        (index_list, tran_value) = ttemp
        for t in index_list:
            DATA[t][-1] = tran_value[:]
            partition_data.append(DATA[t][:])
    if _DEBUG:
        print "Begin Mondrian"
    # anonymize qid and sa part with mondrian_l_diversity
    result, qid_ncp = mondrian_l_diversity(ATT_TREES, partition_data, l)
    rtime = float(time.time() - start_time)
    if _DEBUG:
        print "Total running time = %.2f seconds" % rtime
    # transform data format (QID1,.., QIDn, SA set, GroupID, 1/|group size|, Group SA domain)
    # 1/|group size|, Group SA domain will be used in evaluation
    return (result, (qid_ncp, sa_ncp, rtime))
Пример #4
0
def m_generalization(att_trees, data, k=10, l=5):
    """Using partition_for_transaction to anonymize SA (transaction) partition,
    while applying anatomy to separate QID and SA
    return (result, eval_result)
    result is 2-dimensional list
    eval_result is a tuple (rncp, tncp, rtime)
    """
    global ATT_TREES, DATA
    ATT_TREES = att_trees
    DATA = data
    start_time = time.time()
    if _DEBUG:
        print "size of dataset %d" % len(data)
    result = []
    trans = [t[-1] for t in data]
    trans_set, tncp = partition(att_trees[-1], trans, k)
    partition_data = []
    for ttemp in trans_set:
        (index_list, tran_value) = ttemp
        for t in index_list:
            DATA[t][-1] = tran_value[:]
            partition_data.append(DATA[t][:])
    if _DEBUG:
        print "Begin Mondrian"
    result, rncp = mondrian_l_diversity(ATT_TREES, partition_data, l)
    rtime = float(time.time() - start_time)
    if _DEBUG:
        print "Total running time = %.2f seconds" % rtime
    # transform data format (QID1,.., QIDn, SA set, GroupID, 1/|group size|, Group SA domain)
    # 1/|group size|, Group SA domain will be used in evaluation
    return (result, (rncp, tncp, rtime))
Пример #5
0
def get_result_dataset(att_trees, data, l=5, num_test=10):
    """
    fix l and QI, while changing size of dataset
    num_test is the test nubmber.
    """
    data_back = copy.deepcopy(data)
    length = len(data_back)
    joint = 5000
    dataset_num = length / joint
    print "L=%d" % l
    if length % joint == 0:
        dataset_num += 1
    for i in range(1, dataset_num + 1):
        pos = i * joint
        ncp = rtime = 0
        if pos > length:
            continue
        print '#' * 30
        print "size of dataset %d" % pos
        for j in range(num_test):
            temp = random.sample(data, pos)
            _, eval_result = mondrian_l_diversity(att_trees, temp, l)
            ncp += eval_result[0]
            rtime += eval_result[1]
            data = copy.deepcopy(data_back)
        ncp /= num_test
        rtime /= num_test
        print "Average NCP %0.2f" % ncp + "%"
        print "Running time %0.2f" % rtime + " seconds"
        print '#' * 30
Пример #6
0
    def test_group(self):
        init_tree()
        att_trees = [ATT_TREE]

        data = [['a1', ['female', 'diabetes']], ['a1', ['female', 'cold']],
                ['b1', ['male', 'diabetes']], ['a2', ['male', 'cold']]]
        result, eval_result = mondrian_l_diversity(att_trees, data, 2)
        print result
        self.assertTrue(abs(eval_result[0] - 0))
Пример #7
0
def get_result_one(att_trees, data, l=5):
    """
    run mondrian_l_diversity for one time, with l=5
    """
    print "L=%d" % l
    data_back = copy.deepcopy(data)
    _, eval_result = mondrian_l_diversity(att_trees, data, l)
    data = copy.deepcopy(data_back)
    print "NCP %0.2f" % eval_result[0] + "%"
    print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #8
0
    def test_single(self):
        init_tree()
        att_trees = [ATT_TREE] * 2

        data = [['a1', 'b1', 'cold'], ['a2', 'b1', 'cold'],
                ['a1', 'b2', 'cold'], ['a1', 'b1', 'cold'],
                ['a1', 'b2', 'cancer'], ['a1', 'b1', 'cancer']]
        result, eval_result = mondrian_l_diversity(att_trees, data, 2)
        print result
        self.assertTrue(abs(eval_result[0] - 0))
Пример #9
0
def get_result_one(att_trees, data, l=5):
    """
    run mondrian_l_diversity for one time, with l=5
    """
    print "L=%d" % l
    data_back = copy.deepcopy(data)
    result, eval_result = mondrian_l_diversity(att_trees, data, l)
    write_to_file(result)
    data = copy.deepcopy(data_back)
    print "NCP= %0.2f %%" % eval_result[0]
    print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #10
0
def get_result_one(att_trees, data, l=5):
    """
    run mondrian_l_diversity for one time, with l=5
    """
    print "L=%d" % l
    data_back = copy.deepcopy(data)
    _, eval_result = mondrian_l_diversity(att_trees, data, l)
    data = copy.deepcopy(data_back)
    print "NCP %0.2f" % eval_result[0] + "%"
    # print _
    print "Running time %0.2f" % eval_result[1] + " seconds"
 def test_mondrian_l_diversity(self):
     init_tree()
     att_trees = [ATT_TREE]
     data = [['a1', 'a1'],
             ['b2', 'a1'],
             ['b1', 'b1'],
             ['b2', 'b1'],
             ['b1', 'b2'],
             ['b2', 'b2'],
             ['a1', 'a2']]
     result, eval_result = mondrian_l_diversity(att_trees, data, 2)
     self.assertTrue(abs(eval_result[0] - 0) <= 0.001)
Пример #12
0
def get_result_l(att_trees, data):
    """
    change l, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    for l in range(2, 21):
        print '#' * 30
        print "L=%d" % l
        result, eval_result = mondrian_l_diversity(att_trees, data, l)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #13
0
def get_result_l(att_trees, data):
    """
    change l, whle fixing QD and size of dataset
    """
    data_back = copy.deepcopy(data)
    for l in range(2, 21):
        print "#" * 30
        print "L=%d" % l
        result, eval_result = mondrian_l_diversity(att_trees, data, l)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #14
0
def get_result_one(att_trees, data, l=5):

    # run mondrian_l_diversity for one time, with l=5

    print "L=%d" % l
    data_back = copy.deepcopy(data)
    result, eval_result = mondrian_l_diversity(att_trees, data, l)
    write_to_file(result)
    data = copy.deepcopy(data_back)
    print ""
    print "Normalized Certainty Penalty (NCP): %0.2f %%" % eval_result[0]
    print "Done in %.2f seconds (%.3f minutes (%.2f hours))" % (
        eval_result[1], eval_result[1] / 60, eval_result[1] / 60 / 60)
Пример #15
0
def get_result_qi(att_trees, data, l=5):
    """
    change nubmber of QI, whle fixing l and size of dataset
    """
    data_back = copy.deepcopy(data)
    num_data = len(data[0])
    print "L=%d" % l
    for i in reversed(range(1, num_data)):
        print '#' * 30
        print "Number of QI=%d" % i
        _, eval_result = mondrian_l_diversity(att_trees, data, l, i)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #16
0
def get_result_qi(att_trees, data, l=5):
    """
    change nubmber of QI, whle fixing l and size of dataset
    """
    data_back = copy.deepcopy(data)
    num_data = len(data[0])
    print "L=%d" % l
    for i in reversed(range(1, num_data)):
        print "#" * 30
        print "Number of QI=%d" % i
        _, eval_result = mondrian_l_diversity(att_trees, data, l, i)
        data = copy.deepcopy(data_back)
        print "NCP %0.2f" % eval_result[0] + "%"
        print "Running time %0.2f" % eval_result[1] + " seconds"
Пример #17
0
def get_result_l(att_trees, data):

    # change l, while fixing QD and size of dataset

    data_back = copy.deepcopy(data)
    for l in range(2, 21):
        print '#' * 30
        print "L=%d" % l
        result, eval_result = mondrian_l_diversity(att_trees, data, l)
        data = copy.deepcopy(data_back)
        print ""
        print "Normalized Certainty Penalty (NCP): %0.2f %%" % eval_result[0]
        print "Done in %.2f seconds (%.3f minutes (%.2f hours))" % (
            eval_result[1], eval_result[1] / 60, eval_result[1] / 60 / 60)
Пример #18
0
def get_result_one(att_trees, data, l=config2.l):
    """
    run mondrian_l_diversity for one time, with l=5
    """
    res = ""
    print("L=%d" % l)
    data_back = copy.deepcopy(data)
    result, eval_result = mondrian_l_diversity(att_trees, data, l)
    write_to_file(result)
    data = copy.deepcopy(data_back)
    print("NCP= %0.2f %%" % eval_result[0])
    print('内存使用:',
          (psutil.Process(os.getpid()).memory_full_info()).uss / 1024. / 1024.,
          'MB')
    print("Running time %0.2f" % eval_result[1] + " seconds")
    res = res + "k= " + str(l) + ", ncp = " + str(
        eval_result[0]) + "%, memory = " + str(
            (psutil.Process(os.getpid()).memory_full_info()).uss / 1024. /
            1024.) + ", runtime = " + str(eval_result[1])
    with open("data/result1.data", "a") as fp:
        fp.write(res + '\n')