def bucket_sort(numbers, num_buckets=10): """Sort given numbers by distributing into buckets representing subranges, then sorting each bucket and concatenating all buckets in sorted order. Running time: O(n.logn) since merge sort is called Memory usage: O(n) since we make an extra 2d array for items""" # Find range of given numbers (minimum and maximum values) if len(numbers) < 2: return minm = min(numbers) maxm = max(numbers) bucket_range = (maxm - minm) // num_buckets + 1 # Create list of buckets to store numbers in subranges of input range buckets = [[] for _ in range(num_buckets)] # Loop over given numbers and place each item in appropriate bucket for item in numbers: index = (item - minm) // bucket_range buckets[index].append(item) # Sort each bucket using any sorting algorithm (recursive or another) for i in range(len(buckets)): merge_sort(buckets[i]) # Improve this to mutate input instead of creating new output list i = 0 for bucket in buckets: for item in bucket: numbers[i] = item i += 1
def test_merge_sort(self): items1 = [ 'Absol', 'Gyrados', 'Milotic', 'Pikachu', 'Charizard', 'Mew', 'Misdreavous', 'Absol' ] merge_sort(items1) assert items1 == [ 'Absol', 'Absol', 'Charizard', 'Gyrados', 'Mew', 'Milotic', 'Misdreavous', 'Pikachu' ]
def bucket_sort(numbers): """Sort given numbers by distributing into buckets representing subranges, then sorting each bucket and concatenating all buckets in sorted order. Running time: O(n * subranges), where n is the size of the numbers array and subranges repesents the size of the duplicates. This is because the runtime of this method asymptotically scales with respect to the time it takes to place all the elements in buckets. This step will grow with the total number of elements being sorted. It will also grow with the number of duplicates, because they will end up in the same bucket, and that will increase the runtime we use to execute merge sort on that bucket. In the best case subranges is much less than n, so the overall complexity tends towards O(n). In the worst case all the values are duplicates. so they end up in the same bucket, and are sorted in the same time it would take to just use merge sort by itself on numbers. Memory usage: O(n^2) because the memory required to execute this function scales with the size of the buckets array we construct. The number of elements in this list is n, and the number of LinkedList nodes across all the array positions is also n. """ buckets = make_buckets(numbers) # Sort each bucket using any sorting algorithm (recursive or another) for index, bucket in enumerate(buckets): # n iterations if bucket.size > 0: # this has less iterations as duplicates increase values = bucket.items() merge_sort(values) # linearithmic time, over only a subset buckets[index] = LinkedList(values) # linear for only a subset # Loop over buckets and append each bucket's numbers into output list numbers_index = 0 for index, bucket in enumerate(buckets): # b_size = bucket.size if b_size > 0: for i in range(b_size): numbers[numbers_index + i] = bucket.get_at_index(i) numbers_index += b_size
def test_sort_on_large_list(self): T1 = [5, 1, 6, 78, 5, 1, 2, 5, 1] merge_sort(T1) assert T1 == [1, 1, 1, 2, 5, 5, 5, 6, 78] T2 = [5, 1, 2, 56, 2, 6, 78, 8, 90, 124] merge_sort(T2) assert T2 == [1, 2, 2, 5, 6, 8, 56, 78, 90, 124] T3 = [56, 1, 3, 634, 7, 8, 2, 123, 1, 2] merge_sort(T3) assert T3 == [1, 1, 2, 2, 3, 7, 8, 56, 123, 634]
def test_sort_on_small_list(self): T1 = [39, 6, 8, 12, 10] merge_sort(T1) assert T1 == [6, 8, 10, 12, 39] T2 = [73, 61, 5, 12] merge_sort(T2) assert T2 == [5, 12, 61, 73] T3 = [20, 20, 91, 24] merge_sort(T3) assert T3 == [20, 20, 24, 91]
def test_sort_on_decimal_with_negative_list(self): T1 = [9.2, -5.3, 2.3, 7.5] merge_sort(T1) assert T1 == [-5.3, 2.3, 7.5, 9.2] T2 = [6.2, -12.12, 75.3, 12.12] merge_sort(T2) assert T2 == [-12.12, 6.2, 12.12, 75.3] T3 = [-9.5, 5.6, -89.1, 12.2] merge_sort(T3) assert T3 == [-89.1, -9.5, 5.6, 12.2]
# Get sort function by name if len(args) >= 1: sort_name = args[0] # Terrible hack abusing globals if sort_name in globals(): sort_function = globals()[sort_name] return sort_function else: # Don't explode, just warn user and show list of sorting functions print('Sorting function {!r} does not exist'.format(sort_name)) print('Available sorting functions:') for name in globals(): if 'sort' in name: print(' {}'.format(name)) return # If using PyTest, change this variable to the sort function you want to test sort = selection_sort if sort == merge_sort: sorted_items = merge_sort(items) sort = sorted_items[:] if __name__ == '__main__': # Get sort function from command-line argument # FIXME: This is causing unittest to throw an error # sort = get_sort_function() unittest.main()
def test_merge_sort(self): for _ in range(100): random_and_sorted = _generate_testcase() SortRecur.merge_sort(random_and_sorted[0]) assert random_and_sorted[0] == random_and_sorted[1]
def test_merge_sort_strings(self): l1 = ["banana", "apple", "orange"] merge_sort(l1) assert l1 == ["apple", "banana", "orange"]
def test_merge_sort_negs(self): l1 = [-3, 5, -7, -4] merge_sort(l1) assert l1 == [-7, -4, -3, 5]
def test_merge_sort(self): l1 = [1, 3, 7, 4, 6, 10, 5, 4] merge_sort(l1) assert l1 == [1, 3, 4, 4, 5, 6, 7, 10]