Exemplo n.º 1
0
def do_experiment_data_size():
    data_name = 'unixData8_pro.txt'
    x_name = "Data_Size"
    data_num = 980

    step = data_num / 5  # #################################################################
    all_time = []
    x_value = []
    for k in range(5):
        minSup = data_num * 0.010
        dataSetDict, dataSet = loadDblpData(("dataSet/" + data_name), ' ',
                                            data_num)
        x_value.append(
            data_num
        )  # #################################################################
        if data_num < 0:  # #################################################################
            break
        time_fp = 0
        time_et = 0
        time_ap = 0
        freqItems_fp = {}
        freqItems_eclat = {}
        freqItems_ap = {}
        for i in range(2):
            ticks0 = time.time()
            freqItems_fp = test_fp_growth(minSup, dataSetDict, dataSet)
            time_fp += time.time() - ticks0
            ticks0 = time.time()
            freqItems_eclat = test_eclat(minSup, dataSetDict, dataSet)
            time_et += time.time() - ticks0
            ticks0 = time.time()
            freqItems_ap = test_apriori(minSup, dataSetDict, dataSet)
            time_ap += time.time() - ticks0
        print("minSup :", minSup, "      data_num :", data_num, \
              "  freqItems_fp:", " freqItems_eclat:", len(freqItems_eclat), "  freqItems_ap:", len(
                freqItems_ap))
        print("fp_growth:", time_fp / 10, "       eclat:", time_et / 10,
              "      apriori:", time_ap / 10)
        # print_freqItems("show", freqItems_eclat)
        data_num -= step  # #################################################################
        use_time = [time_fp / 10, time_et / 10, time_ap / 10]
        all_time.append(use_time)
        # print use_time

    y_value = []
    for i in range(len(all_time[0])):
        tmp = []
        for j in range(len(all_time)):
            tmp.append(all_time[j][i])
        y_value.append(tmp)
    return x_value, y_value
Exemplo n.º 2
0
def do_experiment_min_support():
    data_name = 'unixData8_pro.txt'
    x_name = "Min_Support"
    data_num = 980
    minSup = data_num / 6

    dataSetDict, dataSet = loadDblpData(("dataSet/" + data_name), ',',
                                        data_num)
    step = minSup / 5  # #################################################################
    all_time = []
    x_value = []
    for k in range(5):

        x_value.append(
            minSup
        )  # #################################################################
        if minSup < 0:  # #################################################################
            break
        time_fp = 0
        time_et = 0
        time_ap = 0
        freqItems_fp = {}
        freqItems_eclat = {}
        freqItems_ap = {}
        for i in range(10):
            ticks0 = time.time()
            freqItems_fp = test_fp_growth(minSup, dataSetDict, dataSet)
            time_fp += time.time() - ticks0
            ticks0 = time.time()
            freqItems_eclat = test_eclat(minSup, dataSetDict, dataSet)
            time_et += time.time() - ticks0
            ticks0 = time.time()
            freqItems_ap = test_apriori(minSup, dataSetDict, dataSet)
            time_ap += time.time() - ticks0
        print("minSup :", minSup, "      data_num :", data_num, \
              " freqItems_eclat:", len(freqItems_eclat))
        print("[time spend] fp_growth:", time_fp / 10, "       eclat:",
              time_et / 10, "      apriori:", time_ap / 10)
        # print_freqItems("show", freqItems_eclat)
        minSup -= step  # #################################################################
        use_time = [time_fp / 10, time_et / 10, time_ap / 10]
        all_time.append(use_time)
        # print use_time
    y_value = []
    for i in range(len(all_time[0])):
        tmp = []
        for j in range(len(all_time)):
            tmp.append(all_time[j][i])
        y_value.append(tmp)
    return x_value, y_value
Exemplo n.º 3
0
def do_dblp_data():
    data_name = 'dblpDataAll.txt'
    x_name = "Min_Support"
    data_num = 980
    minSup = 100
    dataSetDict, dataSet = loadDblpData(("dataSet/" + data_name), ',', data_num)

    time_fp = 0
    ticks0 = time.time()
    freqItems_fp = test_eclat(minSup, dataSetDict, dataSet)
    time_fp += time.time() - ticks0
    print(time_fp)

    for item in freqItems_fp:
        print(item)
Exemplo n.º 4
0
def do_test():
    dataSetDict, dataSet = loadDblpData(("dataSet/connectPro.txt"), ',', 100)
    minSup = 101

    # for item in freq_items:
    #     print item
    # freqItems = test_fp_growth(minSup, dataSetDict, dataSet)
    # print_freqItems("show", freqItems)
    #
    freqItems = test_eclat(minSup, dataSetDict, dataSet)
    # print_freqItems("show", freqItems)
    freqItems_eclat = test_eclat(minSup, dataSetDict, dataSet)

    # freqItems_ap = test_apriori(minSup, dataSetDict, dataSet)
    # print_freqItems("show", freqItems_ap)

    print(len(freqItems_eclat))