def test_01(self):
        list = [4,1,2,3,8,9,6]
        quick_sort(list)

        self.assertEqual(
            str([1,2,3,4,6,8,9]),
            str(list)
        )
Пример #2
0
 def test_string_sort_iterative(self):
     a_list = [
         "this", "is", "some", "sample", "data", "for", "sorting", "test"
     ]
     b_list = a_list.copy()
     quick_sort(a_list, 'three_way')
     b_list.sort()
     self.assertEqual(a_list, b_list)
Пример #3
0
 def test_string_sort_recursive(self):
     a_list = [
         "this", "is", "some", "sample", "data", "for", "sorting", "test"
     ]
     b_list = a_list.copy()
     quick_sort(a_list, 'basic')
     b_list.sort()
     self.assertEqual(a_list, b_list)
def information_gain(data: pd.DataFrame, feature, feature_type):
    info_ent = __infomation_entropy(data)  # 总熵

    data_len = data[weight].sum()
    if feature_type == discrete:  #离散型

        # 信息增益:info_gain=info_ent-sum(wi*info_ent(feature_i))
        # info_ent(feature_i) feature属性取值为i的信息熵
        # wi feature属性取值为i所占比例
        for i in data[feature].unique():
            _data = data[data[feature] == i]
            info_ent -= (_data[weight].sum() /
                         data_len) * __infomation_entropy(_data)

        return info_ent, None

    elif feature_type == continuously:  #连续型
        data = data.astype("float64")
        value = list(set(list(data[feature])))
        # value.sort()
        quick_sort(value)
        i = 0
        split_value, info_gain = 0, 0
        while i < len(value) - 1:
            _info_gain = info_ent
            #中间值二分
            mid = (value[i] + value[i + 1]) / 2
            data_less_than = data[data[feature] < mid]
            data_more_than = data[data[feature] >= mid]
            _info_gain -= (data_less_than[weight].sum() /
                           data_len) * __infomation_entropy(data_less_than) + (
                               data_more_than[weight].sum() /
                               data_len) * __infomation_entropy(data_more_than)
            if _info_gain > info_gain:
                info_gain, split_value = _info_gain, mid

            i += 1

        return info_gain, split_value

    else:
        raise RuntimeError("unknow feature type")
 def test_quick_sort(self):
     self.assertEqual([1, 5, 23, 57, 65, 1232],
                      quick_sort([1, 5, 65, 23, 57, 1232]))
Пример #6
0
 def test_integer_sort_iterative(self):
     a_list = list(range(100))
     shuffle(a_list)
     quick_sort(a_list, 'three_way')
     self.assertEqual(a_list, list(range(100)))
Пример #7
0
 def test_integer_sort_recursive(self):
     a_list = list(range(100))
     shuffle(a_list)
     quick_sort(a_list, 'basic')
     self.assertEqual(a_list, list(range(100)))
from sort.selection_sort import selection_sort
from sort.insertion_sort import insertion_sort
from sort.merge_sort import merge_sort
from sort.quick_sort import quick_sort
from sort.shell_sort import shell_sort

data = [2, 3, 1, 9, 6, 4, 5, 7, 8]
print("Lista original:", data)
bubble_sort(data)
print("Ordenado con burbuja clasica", data)

data = [2, 3, 1, 9, 6, 4, 5, 7, 8]
selection_sort(data)
print("Ordenado con selection sort:", data)

data = [2, 3, 1, 9, 6, 4, 5, 7, 8]
insertion_sort(data)
print("Ordenado con insertion sort:", data)

data = [2, 3, 1, 9, 6, 4, 5, 7, 8]
merge_sort(data)
print("Ordenado con MergeSort:", data)

data = [2, 3, 1, 9, 6, 4, 5, 7, 8]
quick_sort(data, 0, len(data) - 1)
print("Ordenando con Quicksort:", data)

data = [2, 3, 1, 9, 6, 4, 5, 7, 8]
shell_sort(data)
print("Ordenando con Shellsort:", data)
 def test_q_sort_fails_with_empty_list(self):
     empty_list = []
     self.assertEqual(quick_sort(empty_list), -1)
Пример #10
0
 def test_lager_list_sort(self):
     unordered_list = [50, 40, 30, 20, 10, 1]
     self.assertEqual(quick_sort(unordered_list), [1, 10, 20, 30, 40, 50])
Пример #11
0
 def test_list_sort(self):
     unordered_list = [4, 3, 2, 1]
     self.assertEqual(quick_sort(unordered_list), [1, 2, 3, 4])
Пример #12
0
from sort.bubble_sort import bubble_sort
from sort.heap_sort import heap_sort
from sort.insertion_sort import insertion_sort
from sort.merge_sort import merge_sort
from sort.quick_sort import qsort, quick_sort
from sort.select_sort import select_sort
from sort.shell_sort import shell_sort

if __name__ == "__main__":
    test = [
        2, 1, 3, 4, 2, 5, 7, 3, 6, 10, 5, 15, 24, 100, 23, 45, 76, 3, 12, 23,
        123, 5432, 12, 2, 0, 234, 0, 122, 3, 7, 8, 9, 238, 1000, 2345, 5678,
        223, 567, 345, 11245, 3345, 345, 12, 345
    ]

    #print("bubble", bubble_sort(test))
    #print("insert", insertion_sort(test))
    #print("select", select_sort(test))
    #print("shell", shell_sort(test))
    #print("merge", merge_sort(test))
    #print("quick", qsort(test))
    print("quick_sort", quick_sort(test, 0, len(test) - 1))
    print("heap_sort", heap_sort(test))
Пример #13
0
array = [random.randint(0, length - 1) for i in range(length)]
print("data length: {}".format(length))

start = time.clock()
insert_sort(copy.deepcopy(array))
print("insert sort cost {}s".format(time.clock() - start))

start = time.clock()
merge_sort(copy.deepcopy(array))
print("merge sort cost {}s".format(time.clock() - start))

start = time.clock()
bubble_sort(copy.deepcopy(array))
print("bubble sort cost {}s".format(time.clock() - start))

start = time.clock()
heap_basic_sort(copy.deepcopy(array))
print("heap sort cost {}s".format(time.clock() - start))

start = time.clock()
shell_sort(copy.copy(array))
print("shell sort cost {}s".format(time.clock() - start))

start = time.clock()
quick_sort(copy.copy(array))
print("quick sort cost {}s".format(time.clock() - start))

start = time.clock()
array.sort()
print("python self sort cost {}s".format(time.clock() - start))
def readFilesAndSort(filenameToSave, algorithm_choice):
    filename = filenameToSave + '.csv'
    unsorted_csv = open(filename, "r+")
    reader = csv.reader(unsorted_csv)
    data = []
    start_time = time.time()
    unsorted_csv.readline()
    for row in reader:
        data.append([(float)(row[3]), (row[0]), (float)(row[1]),
                     (float)(row[2])])

    if algorithm_choice == 1:
        merge_sort.merge_sort(data)
        end_time = time.time() - start_time
        fieldnames_for_csv = [
            'item_description', 'item_price', 'item_shipping', 'total_price'
        ]
        sorted_csv_data = open(filename + "_mergesort.csv", "w+")
        sorted_data_writer = csv.DictWriter(sorted_csv_data,
                                            fieldnames=fieldnames_for_csv)
        sorted_data_writer.writeheader()
        for items in data:
            sorted_data_writer.writerow({
                'item_description': items[1],
                'item_price': items[2],
                'item_shipping': items[3],
                "total_price": items[0]
            })

        sorted_csv_data.close()
        return end_time

    if algorithm_choice == 2:
        data = quick_sort.quick_sort(data)
        end_time = time.time() - start_time
        fieldnames_for_csv = [
            'item_description', 'item_price', 'item_shipping', 'total_price'
        ]
        sorted_csv_data = open(filename + "_quicksort.csv", "w+")
        sorted_data_writer = csv.DictWriter(sorted_csv_data,
                                            fieldnames=fieldnames_for_csv)
        sorted_data_writer.writeheader()
        for items in data:
            sorted_data_writer.writerow({
                'item_description': items[1],
                'item_price': items[2],
                'item_shipping': items[3],
                "total_price": items[0]
            })
        sorted_csv_data.close()
        return end_time
    if algorithm_choice == 3:
        heap_sort.heap_sort(data)
        end_time = time.time() - start_time
        fieldnames_for_csv = [
            'item_description', 'item_price', 'item_shipping', 'total_price'
        ]
        sorted_csv_data = open(filename + "_heapsort.csv", "w+")
        sorted_data_writer = csv.DictWriter(sorted_csv_data,
                                            fieldnames=fieldnames_for_csv)
        sorted_data_writer.writeheader()
        for items in data:
            sorted_data_writer.writerow({
                'item_description': items[1],
                'item_price': items[2],
                'item_shipping': items[3],
                "total_price": items[0]
            })
        sorted_csv_data.close()
        return end_time