def greedy_alg(self): end = False oracle_calls = 0 # take an arbitrary element out of F random.shuffle(self.F) # label vector of the elements (binary classification {-1, 1} labels = -1 * np.ones(shape=(self.size_E, 1)) # E labels of initial labelled data for i in self.C_A: labels[i] = 1 for i in self.C_B: labels[i] = 0 if not intersect(self.C_A, self.C_B): while len(self.F) > 0 and end == False: # print(len(self.F)) next_point = self.F.pop() new_hull_C_A = ConvexHull(self.E[self.C_A], 1) new_hull_C_A.add_points([self.get_point(next_point)], 1) new_C_A, added = get_points_inside_convex_hull(self.E, new_hull_C_A, self.C_A, self.F + self.C_B, next_point) oracle_calls += 1 if not intersect(new_C_A, self.C_B): self.hull_C_A = new_hull_C_A self.C_A = new_C_A self.F = list(set(self.F) - set(added)) for x in added: labels[x] = 1 self.colors_E[x] = "orange" else: new_hull_C_B = ConvexHull(self.E[self.C_B], 1) new_hull_C_B.add_points([self.get_point(next_point)], 1) new_C_B, added = get_points_inside_convex_hull(self.E, new_hull_C_B, self.C_B, self.F + self.C_A, next_point) oracle_calls += 1 if not intersect(self.C_A, new_C_B): self.hull_C_B = new_hull_C_B self.C_B = new_C_B self.F = list(set(self.F) - set(added)) for x in added: labels[x] = 0 self.colors_E[x] = "violet" else: return [], [], False return oracle_calls, labels, True
def training_sets_disjoint(openmlData, mcs): for pair in itertools.combinations(range(openmlData.class_number), r=2): if intersect(mcs.S[pair[0]], mcs.S[pair[1]]): return False return True
def generalized_algorithm(self): #Check if initial sets are disjoint if set.intersection(*[set(x) for x in self.S]): prediction = np.empty((self.size_E, ), dtype=np.int) prediction.fill(-1) for i, class_data in enumerate(self.training_sets): for elem in class_data: prediction[elem] = i return 0, prediction, False #If sets are disjoint continue oracle_calls = 0 if not self.is_greedy: F = np.asarray([x for x, _ in self.pre_sorting.items() ]).astype(dtype=np.int) else: F = [x for x in self.F] random.shuffle(F) F = np.asarray(F).astype(dtype=np.int) arange_indices = np.arange(0, len(F)) F_indices = np.empty((self.size_E, ), dtype=np.int) F_indices[F] = arange_indices elements_remaining = len(F) prediction = np.empty((self.size_E, ), dtype=np.int) prediction.fill(-1) for i, class_data in enumerate(self.S): for elem in class_data: prediction[elem] = i while elements_remaining > 0: element_start = time.time() print(elements_remaining) next_point = F[np.max(np.where(F != -1)[0])] F[F_indices[next_point]] = -1 elements_remaining -= 1 for i in range(self.class_number): if not self.is_greedy: current_class = self.pre_sorting[next_point][1][i] else: current_class = i current_class_elements = np.where( prediction == current_class)[0] current_class_outside = np.where( prediction != current_class)[0] start = time.time() if self.linprog_calc is None: hull = copy.copy(self.current_hulls[current_class]) hull.add_points([self.E[next_point]], 1) if self.print_runtimes: print("Hull add point", time.time() - start) start = time.time() new_closed, added = get_points_inside_convex_hull( self.E, hull, current_class_elements, current_class_outside) if self.print_runtimes: print("Get Inside points", time.time() - start) else: new_closed, added = get_points_inside_convex_hull_linprog( self.E, np.append(current_class_elements, next_point), current_class_outside, next_point) if self.print_runtimes: print("Linprog", time.time() - start) oracle_calls += 1 start = time.time() if not intersect( new_closed, np.setdiff1d( np.where((prediction >= 0))[0], current_class_elements)): F[F_indices[added]] = -1 elements_remaining = len(np.where(F != -1)[0]) prediction[added] = current_class if not self.linprog_calc: self.current_hulls[current_class] = hull if self.print_runtimes: print("Intersection", time.time() - start) break if self.print_runtimes: print("No Intersection", time.time() - start) if self.print_runtimes: print("Element Added Total Time", time.time() - element_start) return oracle_calls, prediction, True
def optimal_runtime_alg(self): oracle_calls = 0 end = False counter = 0 while len(self.F) > 0 and end == False: # print(len(self.F)) if not self.decide_farthest(): items = list(self.convex_A_distances.items()) if len(items) > 0: next_point = items[len(items) - 1][0] Hull1 = self.C_A Hull1.add_points([self.get_point(next_point)], 1) inside1 = get_points_inside_convex_hull(self.E, Hull1) oracle_calls += 1 if not intersect(inside1, self.C_B): self.C_A.add_points([self.get_point(next_point)], 1) for x in inside1: if x not in self.C_A: self.colors_E[x] = "orange" self.F.remove(x) del self.convex_A_distances[x] del self.convex_B_distances[x] self.C_A = inside1 else: # Renew first half space PointsH_1 = np.ndarray(shape=(len(self.C_A), 2)) counter = 0 for i in self.C_A: PointsH_1[counter] = self.get_point(i) counter += 1 self.C_A = ConvexHull(PointsH_1, 1) # Test second half space Hull2 = self.C_B Hull2.add_points([self.get_point(next_point)], 1) inside2 = get_points_inside_convex_hull(self.E, Hull2) oracle_calls += 1 if not intersect(self.C_A, inside2): self.C_B.add_points([self.get_point(next_point)], 1) for x in inside2: if x not in self.C_B: self.colors_E[x] = "violet" self.F.remove(x) del self.convex_A_distances[x] del self.convex_B_distances[x] self.C_B = inside2 else: # Renew second half space PointsH_2 = np.ndarray(shape=(len(self.C_B), 2)) counter = 0 for i in self.C_B: PointsH_2[counter] = self.get_point(i) counter += 1 self.C_B = ConvexHull(PointsH_2, 1) self.F.remove(next_point) del self.convex_A_distances[next_point] del self.convex_B_distances[next_point] else: items = list(self.convex_B_distances.items()) if len(items) > 0: next_point = items[len(items) - 1][0] Hull2 = self.C_B Hull2.add_points([self.get_point(next_point)], 1) inside2 = get_points_inside_convex_hull(self.E, Hull2, self.C_B) oracle_calls += 1 if not intersect(inside2, self.C_A): self.C_B.add_points([self.get_point(next_point)], 1) for x in inside2: if x not in self.C_B: self.colors_E[x] = "violet" self.F.remove(x) del self.convex_A_distances[x] del self.convex_B_distances[x] self.C_B = inside2 else: # Renew second half space PointsH_2 = np.ndarray(shape=(len(self.C_B), 2)) counter = 0 for i in self.C_B: PointsH_2[counter] = self.get_point(i) counter += 1 self.C_B = ConvexHull(PointsH_2, 1) # Test first half space Hull1 = self.C_A Hull1.add_points([self.get_point(next_point)], 1) inside1 = get_points_inside_convex_hull(self.E, Hull1) oracle_calls += 1 if not intersect(self.C_B, inside1): self.C_A.add_points([self.get_point(next_point)], 1) for x in inside1: if x not in self.C_A: self.colors_E[x] = "orange" self.F.remove(x) del self.convex_A_distances[x] del self.convex_B_distances[x] self.C_A = inside1 else: # Renew first half space PointsH_1 = np.ndarray(shape=(len(self.C_A), 2)) counter = 0 for i in self.C_A: PointsH_1[counter] = self.get_point(i) counter += 1 self.C_A = ConvexHull(PointsH_1, 1) self.F.remove(next_point) del self.convex_A_distances[next_point] del self.convex_B_distances[next_point] return oracle_calls
def optimal_hull_alg(self): time_point = time.time() oracle_calls = 0 counter = 0 labels = -1 * np.ones(shape=(self.size_E, 1)) outside_points_1 = [x for x in self.convex_A_hull_distances.keys()] + self.C_B outside_points_2 = [x for x in self.convex_B_hull_distances.keys()] + self.C_A # add labels for i in self.C_A: labels[i] = 1 for i in self.C_B: labels[i] = 0 # check if initial_hulls are intersecting Hull1 = self.C_A inside1, added = get_points_inside_convex_hull(self.E, Hull1, self.C_A, outside_points_1) self.C_A = inside1 Hull2 = self.C_B inside2, added = get_points_inside_convex_hull(self.E, Hull2, self.C_B, outside_points_2) self.C_B = inside2 if not intersect(inside1, inside2): while (len(self.convex_A_hull_distances) > 0 or len(self.convex_B_hull_distances) > 0): # print(len(self.Set1HullDistances), len(self.Set2HullDistances)) added = [] # First set is nearer to nearest not classified point if self.decide_nearest_hull(): time_point = time_step("Find Neighbour:", time_point) if len(self.convex_A_hull_distances) > 0: next_point = self.convex_A_hull_distances.popitem()[0] Hull1 = self.C_A Hull1.add_points([self.get_point(next_point)], 1) time_point = time_step("Adding Convex Hull E 1:", time_point) inside1, added = get_points_inside_convex_hull(self.E, Hull1, self.C_A, outside_points_1) oracle_calls += 1 time_point = time_step("Getting inside E:", time_point) # if there is no intersection the point can be added to the first convex set if not intersect(inside1, self.C_B): time_point = time_step("Intersection Test:", time_point) self.C_A = Hull1 time_point = time_step("Adding Convex Hull E:", time_point) for x in added: # add to labels labels[x] = 1 self.colors_E[x] = "orange" if x in self.convex_A_hull_distances.keys(): del self.convex_A_hull_distances[x] if x in self.convex_B_hull_distances.keys(): del self.convex_B_hull_distances[x] outside_points_1.remove(x) self.C_A = inside1 time_point = time_step("Update arrays:", time_point) # if there is an intersection we have to check if it can be added to the second set else: time_point = time_step("Intersection Test:", time_point) # Renew first half space PointsH_1 = np.ndarray(shape=(len(self.C_A), self.dimension_E)) counter = 0 for i in self.C_A: PointsH_1[counter] = self.get_point(i) counter += 1 self.C_A = ConvexHull(PointsH_1, 1) # Test second half space Hull2 = self.C_B Hull2.add_points([self.get_point(next_point)], 1) inside2, added = get_points_inside_convex_hull(self.E, Hull2, self.C_B, outside_points_2) oracle_calls += 1 # the point can be added to the second set, # if we reach this point the first time all the other E which are classified did not change the optimal margin if not intersect(self.C_A, inside2): self.C_B = Hull2 for x in added: # add to labels labels[x] = 0 self.colors_E[x] = "violet" if x in self.convex_A_hull_distances.keys(): del self.convex_A_hull_distances[x] if x in self.convex_B_hull_distances.keys(): del self.convex_B_hull_distances[x] outside_points_2.remove(x) self.C_B = inside2 # the point cannot be added to any set else: # Renew second half space PointsH_2 = np.ndarray(shape=(len(self.C_B), self.dimension_E)) counter = 0 for i in self.C_B: PointsH_2[counter] = self.get_point(i) counter += 1 self.C_B = ConvexHull(PointsH_2, 1) if next_point in outside_points_1: outside_points_1.remove(next_point) if next_point in outside_points_2: outside_points_2.remove(next_point) time_point = time_step("Point add Hull:", time_point) # Second set is nearer to nearest not classified point else: time_point = time_step("Find Neighbour:", time_point) if len(self.convex_B_hull_distances) > 0: next_point = self.convex_B_hull_distances.popitem()[0] Hull2 = self.C_B Hull2.add_points([self.get_point(next_point)], 1) inside2, added = get_points_inside_convex_hull(self.E, Hull2, self.C_B, outside_points_2) oracle_calls += 1 # we can add the new point to the second, the nearer set if not intersect(inside2, self.C_A): self.C_B = Hull2 for x in added: # add to labels labels[x] = 0 self.colors_E[x] = "violet" if x in self.convex_A_hull_distances.keys(): del self.convex_A_hull_distances[x] if x in self.convex_B_hull_distances.keys(): del self.convex_B_hull_distances[x] outside_points_2.remove(x) self.C_B = inside2 # we check if we can add the point to the first set # if we reach this point the first time all the other E which are classified did not change the optimal margin else: # Renew second half space PointsH_2 = np.ndarray(shape=(len(self.C_B), self.dimension_E)) counter = 0 for i in self.C_B: PointsH_2[counter] = self.get_point(i) counter += 1 self.C_B = ConvexHull(PointsH_2, 1) # Test first half space Hull1 = self.C_A Hull1.add_points([self.get_point(next_point)], 1) inside1, added = get_points_inside_convex_hull(self.E, Hull1, self.C_A, outside_points_1) oracle_calls += 1 # the point can be classified to the second set if not intersect(self.C_B, inside1): self.C_A = Hull1 for x in added: # add to labels labels[x] = 1 self.colors_E[x] = "orange" if x in self.convex_A_hull_distances.keys(): del self.convex_A_hull_distances[x] if x in self.convex_B_hull_distances.keys(): del self.convex_B_hull_distances[x] outside_points_1.remove(x) self.C_A = inside1 # we cannot classify the point else: # Renew first half space PointsH_1 = np.ndarray(shape=(len(self.C_A), self.dimension_E)) counter = 0 for i in self.C_A: PointsH_1[counter] = self.get_point(i) counter += 1 self.C_A = ConvexHull(PointsH_1, 1) if next_point in outside_points_1: outside_points_1.remove(next_point) if next_point in outside_points_2: outside_points_2.remove(next_point) time_point = time_step("Point add Hull:", time_point) else: return ([], [], False) return (oracle_calls, labels, True)
def optimal_alg(self): time_point = time.time() oracle_calls = 0 counter = 0 labels = -1 * np.ones(shape=(self.size_E, 1)) outside_points_1 = [x for x in self.convex_A_distances.keys()] + self.C_B outside_points_2 = [x for x in self.convex_B_distances.keys()] + self.C_A # add labels for i in self.C_A: labels[i] = 1 for i in self.C_B: labels[i] = 0 if not intersect(self.C_A, self.C_B): while (len(self.convex_A_distances) > 0 or len(self.convex_B_distances) > 0): # print(len(self.Set1Distances), len(self.Set2Distances)) added = [] # First set is nearer to nearest not classified point if self.decide_nearest(): time_point = time_step("Find Neighbour:", time_point) if len(self.convex_A_distances) > 0: next_point = self.convex_A_distances.popitem()[0] new_hull_C_A = ConvexHull(self.E[self.C_A], 1) new_hull_C_A.add_points([self.get_point(next_point)], 1) time_point = time_step("Adding Convex Hull E 1:", time_point) new_C_A, added = get_points_inside_convex_hull(self.E, new_hull_C_A, self.C_A, outside_points_1) oracle_calls += 1 time_point = time_step("Getting inside E:", time_point) # if there is no intersection the point can be added to the first convex set if not intersect(new_C_A, self.C_B): time_point = time_step("Intersection Test:", time_point) self.C_A = new_C_A self.hull_C_A = new_hull_C_A for x in added: # add to labels labels[x] = 1 self.colors_E[x] = "orange" if x in self.convex_A_distances.keys(): del self.convex_A_distances[x] if x in self.convex_B_distances.keys(): del self.convex_B_distances[x] outside_points_1 = list(set(outside_points_1) - set(added)) time_point = time_step("Update arrays:", time_point) # if there is an intersection we have to check if it can be added to the second set else: # Test second half space new_hull_C_B = ConvexHull(self.E[self.C_B], 1) new_hull_C_B.add_points([self.get_point(next_point)], 1) new_C_B, added = get_points_inside_convex_hull(self.E, new_hull_C_B, self.C_B, outside_points_2) oracle_calls += 1 # the point can be added to the second set, # if we reach this point the first time all the other E which are classified did not change the optimal margin if not intersect(self.C_A, new_C_B): self.C_B = new_C_B self.hull_C_B = new_hull_C_B for x in added: # add to labels labels[x] = 0 self.colors_E[x] = "violet" if x in self.convex_A_distances.keys(): del self.convex_A_distances[x] if x in self.convex_B_distances.keys(): del self.convex_B_distances[x] outside_points_2 = list(set(outside_points_2) - set(added)) # the point cannot be added to any set else: if next_point in outside_points_1: outside_points_1.remove(next_point) if next_point in outside_points_2: outside_points_2.remove(next_point) time_point = time_step("Point add Hull:", time_point) # Second set is nearer to nearest not classified point else: time_point = time_step("Find Neighbour:", time_point) if len(self.convex_B_distances) > 0: next_point = self.convex_B_distances.popitem()[0] new_hull_C_B = ConvexHull(self.E[self.C_B], 1) new_hull_C_B.add_points([self.get_point(next_point)], 1) new_C_B, added = get_points_inside_convex_hull(self.E, new_hull_C_B, self.C_B, outside_points_2) oracle_calls += 1 # we can add the new point to the second, the nearer set if not intersect(new_C_B, self.C_A): self.C_B = new_C_B self.hull_C_B = new_hull_C_B for x in added: # add to labels labels[x] = 0 self.colors_E[x] = "violet" if x in self.convex_A_distances.keys(): del self.convex_A_distances[x] if x in self.convex_B_distances.keys(): del self.convex_B_distances[x] outside_points_2 = list(set(outside_points_2) - set(added)) # we check if we can add the point to the first set # if we reach this point the first time all the other E which are classified did not change the optimal margin else: # Test first half space new_hull_C_A = ConvexHull(self.E[self.C_A], 1) new_hull_C_A.add_points([self.get_point(next_point)], 1) new_C_A, added = get_points_inside_convex_hull(self.E, new_hull_C_A, self.C_A, outside_points_1) oracle_calls += 1 # the point can be classified to the second set if not intersect(self.C_B, new_C_A): self.hull_C_A = new_hull_C_A self.C_A = new_C_A for x in added: # add to labels labels[x] = 1 self.colors_E[x] = "orange" if x in self.convex_A_distances.keys(): del self.convex_A_distances[x] if x in self.convex_B_distances.keys(): del self.convex_B_distances[x] outside_points_1 = list(set(outside_points_1) - set(added)) # we cannot classify the point else: if next_point in outside_points_1: outside_points_1.remove(next_point) if next_point in outside_points_2: outside_points_2.remove(next_point) time_point = time_step("Point add Hull:", time_point) else: return [], [], False return oracle_calls, labels, True
def greedy_alg2(self): end = False oracle_calls = 0 random.shuffle(self.F) # label vector of the elements (binary classification {-1, 1} labels = -1 * np.ones(shape=(self.size_E, 1)) outside_1 = self.F + self.C_B outside_2 = self.F + self.C_A # E labels of initial labelled data for i in self.C_A: labels[i] = 1 for i in self.C_B: labels[i] = 0 # E initial initial_hulls Hull1 = self.C_A inside1, added = get_points_inside_convex_hull(self.E, Hull1, self.C_A, self.F + self.C_B) oracle_calls += 1 if not intersect(inside1, self.C_B): for x in added: labels[x] = 1 self.colors_E[x] = "orange" self.C_A = inside1 # E initial initial_hulls Hull2 = self.C_B inside2, added = get_points_inside_convex_hull(self.E, Hull2, self.C_B, self.F + self.C_A) oracle_calls += 1 if not intersect(inside2, self.C_A): for x in added: labels[x] = 0 self.colors_E[x] = "violet" self.C_B = inside2 if not intersect(inside1, inside2): for x in self.C_A: if x in self.F: self.F.remove(x) for x in self.C_B: if x in self.F: self.F.remove(x) while len(self.F) > 0 and end == False: # print(len(self.F)) next_point = self.F.pop() if (random.randint(0, 1)): Hull1 = self.C_A Hull1.add_points([self.get_point(next_point)], 1) inside1, added = get_points_inside_convex_hull(self.E, Hull1, self.C_A, self.F + self.C_B, next_point) oracle_calls += 1 if not intersect(inside1, self.C_B): self.C_A.add_points([self.get_point(next_point)], 1) for x in added: labels[x] = 1 self.colors_E[x] = "orange" if x in self.F: self.F.remove(x) self.C_A = inside1 else: # initialize first half space PointsH_1 = np.ndarray(shape=(len(self.C_A), self.dimension_E)) counter = 0 for i in self.C_A: PointsH_1[counter] = self.get_point(i) counter += 1 self.C_A = ConvexHull(PointsH_1, 1) Hull2 = self.C_B Hull2.add_points([self.get_point(next_point)], 1) inside2, added = get_points_inside_convex_hull(self.E, Hull2, self.C_B, self.F + self.C_A, next_point) oracle_calls += 1 if not intersect(self.C_A, inside2): self.C_B.add_points([self.get_point(next_point)], 1) for x in added: labels[x] = 0 self.colors_E[x] = "violet" if x in self.F: self.F.remove(x) self.C_B = inside2 else: # initialize second half space PointsH_2 = np.ndarray(shape=(len(self.C_B), self.dimension_E)) counter = 0 for i in self.C_B: PointsH_2[counter] = self.get_point(i) counter += 1 self.C_B = ConvexHull(PointsH_2, 1) else: Hull2 = self.C_B Hull2.add_points([self.get_point(next_point)], 1) inside2, added = get_points_inside_convex_hull(self.E, Hull2, self.C_B, self.F + self.C_A, next_point) oracle_calls += 1 if not intersect(inside2, self.C_A): self.C_B.add_points([self.get_point(next_point)], 1) for x in added: labels[x] = 0 self.colors_E[x] = "violet" if x in self.F: self.F.remove(x) self.C_B = inside2 else: # initialize first half space PointsH_2 = np.ndarray(shape=(len(self.C_B), self.dimension_E)) counter = 0 for i in self.C_B: PointsH_2[counter] = self.get_point(i) counter += 1 self.C_B = ConvexHull(PointsH_2, 1) Hull1 = self.C_A Hull1.add_points([self.get_point(next_point)], 1) inside1, added = get_points_inside_convex_hull(self.E, Hull1, self.C_A, self.F + self.C_B, next_point) oracle_calls += 1 if not intersect(self.C_B, inside1): self.C_A.add_points([self.get_point(next_point)], 1) for x in added: labels[x] = 1 self.colors_E[x] = "orange" if x in self.F: self.F.remove(x) self.C_A = inside1 else: # initialize second half space PointsH_1 = np.ndarray(shape=(len(self.C_A), self.dimension_E)) counter = 0 for i in self.C_A: PointsH_1[counter] = self.get_point(i) counter += 1 self.C_A = ConvexHull(PointsH_1, 1) else: return ([], [], False) return (oracle_calls, labels, True)
def greedy_fast_alg(self): end = False oracle_calls = 0 random.shuffle(self.F) # label vector of the elements (binary classification {1, 0} -1 are unclassified labels = -1 * np.ones(shape=(self.size_E, 1)) outside_1 = self.F + self.C_B outside_2 = self.F + self.C_A # E labels of initial labelled data for i in self.C_A: labels[i] = 1 for i in self.C_B: labels[i] = 0 # E initial initial_hulls inside1, added, intersection = get_inside_points(self.E, self.C_A, self.F, CheckSet=self.C_B) oracle_calls += 1 if not intersection: for x in added: labels[x] = 1 self.colors_E[x] = "orange" self.C_A = inside1 # E initial initial_hulls inside2, added, intersection = get_inside_points(self.E, self.C_B, self.F, CheckSet=self.C_A) oracle_calls += 1 if not intersection: for x in added: labels[x] = 0 self.colors_E[x] = "violet" self.C_B = inside2 if not intersect(inside1, inside2): for x in self.C_A: if x in self.F: self.F.remove(x) for x in self.C_B: if x in self.F: self.F.remove(x) while len(self.F) > 0 and end == False: # print(len(self.F)) next_point = self.F.pop() inside1, added, intersection = get_inside_points(self.E, self.C_A, self.F, next_point, self.C_B) oracle_calls += 1 if not intersection: for x in added: labels[x] = 1 self.colors_E[x] = "orange" if x in self.F: self.F.remove(x) self.C_A = inside1 else: inside2, added, intersection = get_inside_points(self.E, self.C_B, self.F, next_point, self.C_A) oracle_calls += 1 if not intersection: for x in added: labels[x] = 0 self.colors_E[x] = "violet" if x in self.F: self.F.remove(x) self.C_B = inside2 else: return ([], [], False) return (oracle_calls, labels, True)