def KS_test_2(data1, data2): data1 = np.copy(data1) data2 = np.copy(data2) mergesort(data1) mergesort(data2) N1 = len(data1) N2 = len(data2) D = 0 i1 = i2 = 0 f1 = f2 = 0 while i1 < N1 and i2 < N2: d1 = data1[i1] d2 = data2[i2] if d1 <= d2: i1 += 1 f1 = i1 / N1 if d2 <= d1: i2 += 1 f2 = i2 / N2 distance = np.abs(f2 - f1) if distance > D: D = distance N_eff_sqrt = np.sqrt((N1 * N2) / (N1 + N2)) p_val = 1 - KS_cdf(D * (N_eff_sqrt + 0.12 + 0.11 / N_eff_sqrt)) return D, p_val
def median_of_medians(a): n = len(a) p = range(0, n, 5) + [n] sublist = [a[p[i]:p[i+1]] for i in range(len(p)-1)] mergelist = [mergesort(s)[len(s)/2] for s in sublist] # TODO: make this call recursive return mergelist[len(mergelist)/2]
def search(arr, item): """Performs binary search on an array with the given item and returns True or False. >>> search([5, 4, 1, 6, 2, 3, 9, 7], 2) True >>> search([5, 4, 1, 6, 2, 3, 9, 7], 8) False """ arr = mergesort(arr) first = 0 last = len(arr) - 1 found = False while first <= last and not found: midpoint = (first + last) // 2 if arr[midpoint] == item: found = True else: if item < arr[midpoint]: last = midpoint - 1 else: first = midpoint + 1 return found
def KS_test(data, cdf): data = np.copy(data) mergesort(data) N = len(data) D = 0 prev_cdf = 0 for i, sample in enumerate(data): data_cdf = (i + 1) / N dist_cdf = cdf(sample) distance = max(np.abs(data_cdf - dist_cdf), np.abs(prev_cdf - dist_cdf)) if distance > D: D = distance prev_cdf = data_cdf p_val = 1 - KS_cdf(D * (N**0.5 + 0.12 + 0.11 * N**-0.5)) return D, p_val
def Kuipers_test(data, cdf): # Note that the p values returned are only accurate if they are small # Accurate within 2 decimal places for p < 0.74 # Thus, a rejection of H0 is real, but the p-values are not distributed # as could be expected data = np.copy(data) mergesort(data) N = len(data) D_plus = 0 D_minus = 0 prev_cdf = 0 for i, sample in enumerate(data): data_cdf = (i + 1) / N dist_cdf = cdf(sample) distance_plus = data_cdf - dist_cdf distance_minus = dist_cdf - prev_cdf if distance_plus > D_plus: D_plus = distance_plus if distance_minus > D_minus: D_minus = distance_minus prev_cdf = data_cdf D = D_minus + D_plus p_val = Kuipers_cdf(D * (N**0.5 + 0.155 + 0.24 * N**-0.5)) return D, p_val
def test_min_mergesort(): sorted_items = [KeyedItem(key=i) for i in range(100)] items = [item for item in sorted_items] random.shuffle(items) mergesort(items) assert items == sorted_items
def test_max_mergesort(): sorted_items = [KeyedItem(key=i) for i in range(99, -1, -1)] items = [item for item in sorted_items] random.shuffle(items) mergesort(items, order='max') assert items == sorted_items
selection_sort = Button(root, text="Selection sort", command=lambda: sorting.selectionsort()) selection_sort.grid(row=0, column=0) quick_sort = Button( root, text="Quick sort", command=lambda: sorting.quickSort_high_pivot(0, len(value_arry) - 1)) quick_sort.grid(row=0, column=1) merge_sort = Button( root, text="Merge sort", command=lambda: sorting.mergesort(value_arry, rect_arry, 0)) merge_sort.grid(row=0, column=2) insertion_sort = Button(root, text="Insertion sort", command=lambda: sorting.insertionsort()) insertion_sort.grid(row=0, column=3) bubble_sort = Button(root, text="Bubble sort", command=lambda: sorting.bubblesort()) bubble_sort.grid(row=0, column=4) #creates scale setting array_size = Scale(root, from_=10, to=200, orient=HORIZONTAL, length=400) array_size.grid(row=0, column=5)
def test_mergesort(): list = generateRandomList() sorting.mergesort(list) assert (is_sorted(list))
def test_mergesort(self): correct = self.array[::] correct.sort() sorting.mergesort(self.array) self.assertEqual(self.array, correct)
def continousGini(column, itsLabel): a = column[:] l = itsLabel[:] mergesort(a, l) avg_gini = {} avg_gini[a[0]] = {} i = 0 #print a while i < len(a) - 1: avg_gini[int((a[i] + a[i + 1]) / 2)] = {} i = i + 1 avg_gini[a[len(a) - 1]] = {} avg_gini_cont = {} for x in avg_gini: avg_gini_cont[x] = {} avg_gini_cont[x]['yes'] = 0 avg_gini_cont[x]['no'] = 0 avg_gini[x]['yes'] = 0 avg_gini[x]['no'] = 0 for y in avg_gini[x]: avg_gini[x][y] = {} for z in types_lables: avg_gini[x][y][z] = 0 i = 0 while i < len(a): for x in avg_gini: if a[i] <= x: avg_gini[x]['yes'][l[i]] = avg_gini[x]['yes'].get(l[i], 0) + 1 else: avg_gini[x]['no'][l[i]] = avg_gini[x]['no'].get(l[i], 0) + 1 i = i + 1 for x in avg_gini: avg_gini_cont[x]['yes'] = sum(avg_gini[x]['yes'].values()) avg_gini_cont[x]['no'] = sum(avg_gini[x]['no'].values()) avg_gini_val = {} for x in avg_gini: yes_dic = avg_gini[x]['yes'] no_dic = avg_gini[x]['no'] yes_tot = 0.0 no_tot = 0.0 for y in types_lables: yes_tot += yes_dic[y] no_tot += no_dic[y] yes_psum = 0.0 no_psum = 0.0 for y in types_lables: if yes_tot != 0: yes_psum += pow(float(yes_dic[y]) / float(yes_tot), 2) if no_tot != 0: no_psum += pow(float(no_dic[y]) / float(no_tot), 2) if yes_psum != 0: yes_psum = round(1.0 - yes_psum, 4) if no_psum != 0: no_psum = round(1.0 - no_psum, 4) total_sum = 0.0 total_sum = yes_psum*(float(avg_gini_cont[x]['yes'])/(float(len(a)))) + \ no_psum*(float(avg_gini_cont[x]['no'])/(float(len(a)))) avg_gini_val[x] = round(total_sum, 4) #print avg_gini_cont #print avg_gini #print avg_gini_val #print len(avg_gini) min_gini = min(avg_gini_val.values()) split_value = avg_gini_val.keys()[avg_gini_val.values().index(min_gini)] #print min_gini,split_value return min_gini, split_value
def test_mergesort(): the_list = fill_random_list() assert sorting.mergesort(the_list) == sorted(the_list)