def compute(self, size): """ self.points is a vector with n rows and d cols bi its a vector of with klogn rows and d dols dist(i) represents the sens(p_i) as in the formula discussed. """ e = w_kmeans.Kmeans(self.points, np.expand_dims(self.weights, axis=0), self.k, 10) bi = e.compute() dist = utils.get_dist_to_centers(self.points, bi) #find distance of each point to its nearset cluster if self.weights is not None: # its always not none!!! dist /= np.sum(dist) #norm dist *= 2 c = utils.get_centers(self.points, bi)#get centers c = self.find_cluester_size_weighted(c, W=self.weights)#get weighted size of center's cluster dist += ((4.0)/(c)) #add to each point the size of its cluster as at the formula t = np.sum(dist*self.weights) weights = 1/(dist*size) weights *= t # print t dist *= self.weights dist /= np.sum(dist) prob = dist # its actually the sampling probability points, weights = utils.sample(self.points, prob, size, weights=weights) return points, weights
def __init__(self, base_network, source_iter, num_classes): self.eps = 0.0001 self.stop = False self.base_network = base_network self.source_ctr = utils.get_centers(base_network, source_iter, num_classes) # self.target_ctr = 0 self.target_features = [] self.num_classes = num_classes self.clustered_targets = {}
def compute(self): self.centers, temp = utils.sample(self.p, None, n=self.k) # random k centers np.reshape(self.centers, (self.k, 2)) #just fix the shape dist = utils.get_centers(self.p ,self.centers) points = self.p weights = self.w.T for j in range(0, self.e): for i in range(0, self.k): x = [dist == i] a = points[x] w = weights[x] c = a*w new_center = np.sum(c ,axis=0, keepdims=1) if np.sum(w) == 0: continue new_center /= np.sum(w) self.centers[i] = new_center dist = utils.get_centers(self.p, self.centers) return self.centers
def compute(self, size, grnds=10, ginit=1): q = w_KMeans.KMeans(self.p, np.expand_dims(self.w , axis=0), self.k, grnds, ginit).compute() # this is my kmeans for the coreset. sq_d = utils.get_sq_distances(self.p, q) # Squared distances from each point to each center dist = utils.get_dist_to_centers(d=sq_d) # I get the sq dist from each point its center. dist /= np.sum(dist) # Norm dist *= 2 # according to the paper c = utils.get_centers(d=sq_d) # I get the index of the center c = self._find_cluster_size(c) # Find the size of the cluster for each point. s = dist + 4.0/c # I add it, the 4 is according to the paper. t = np.sum(s*self.w) # This is the t from the paper. u = t/(s*size) # the new weights for coreset. prob = s*self.w/t # the probability for sampling p, w = utils.sample(self.p, size, prob=prob, weights=u) # sample coreset: points + weights. return p, w
def solve(list_k, examples, out_path, centers_=None, conf_num=None, out_minus=True, out_iterations=False): """Solves the clustering problem using EM algorithm. """ # start_centers = utils.get_centers() dimension = len(examples[0][0]) # print (dimension) N = len(examples) # print(N) for k in list_k: # sys.stderr.write(str(k) + "\n") h = np.zeros((N, k), dtype=np.float64) # responsibilities.fill(1/float(k)) pi_k = np.zeros(k, dtype=np.float64) pi_k.fill(1/float(k)) # print configuration = False if centers_ == None: centers = utils.get_centers(k, examples) else: centers = centers_ configuration = True matrix = np.identity(dimension, dtype=np.float64) matrices = np.array([matrix] * k) # print (matrices) log = calc_log(examples, centers, matrices, pi_k) # print (log) # output stuff num_iter = 0 # if k == 4: # groups = [list([])] * k groups = [] for i in range(k): groups.append([]) probabilities = dict() # if k == 4: # print (h) # print (pi_k) # print (matrices) # print (centers) if out_iterations: iter_out = open(out_path + "em-kmeans.dat", "w") iter_out.write("#iteracije: log-izglednost\n") iter_out.write("--\n") iter_out.write("#0: %.2lf\n" % log) # buffer_all = "K = %d\n" % k # number of members in a certain group while True: num_iter += 1 group_count = [0] * k # init = dict() group_dicts = [] for i in range(k): group_dicts.append({}) # group_dicts = [] * k # E step # print (num_iter) # changed = 0 for i in range(N): x = examples[i][0] mi_j = centers[0][0] sigma_j = matrices[0] p = utils.multivariate_probability(x, mi_j, sigma_j, dimension) * pi_k[0] h[i][0] = p sum_h = h[i][0] probabilities[i] = p group = examples[i][1] if group not in group_dicts[0]: group_dicts[0][group] = 1 else: group_dicts[0][group] += 1 # if p > group_count[0] += 1 examples[i][2] = 0 for j in range(1,k): mi_j = centers[j][0] sigma_j = matrices[j] # print (sigma_j) p2 = utils.multivariate_probability(x, mi_j, sigma_j, dimension) * pi_k[j] if p2 > p: group_count[examples[i][2]] -= 1 group_dicts[examples[i][2]][group] -= 1 if group not in group_dicts[j]: group_dicts[j][group] = 1 else: group_dicts[j][group] += 1 examples[i][2] = j group_count[j] += 1 p = p2 probabilities[i] = p h[i][j] = p2 sum_h += h[i][j] h[i] /= sum_h probabilities[i] /= sum_h # print ("$$$$$$$") # M step # if num_iter == 1: # print (h) # sys.exit(1) for i in range(k): new_mi = np.zeros(dimension, dtype=np.float64) sum_h = 0. new_sigma = np.zeros((dimension, dimension), dtype=np.float64) for j in range(N): x = examples[j][0] sum_h += h[j][i] new_mi += h[j][i] * x new_mi /= sum_h # if utils.euclid(new_mi, centers[i][0]) > EPS: # changed = 1 centers[i] = (new_mi, centers[i][1], centers[i][2]) # mi = np.matrix(new_mi) mi = new_mi for j in range(N): x = examples[j][0] # print ((x-mi).T * (x - mi)) diff = np.matrix(x - mi) # print (diff.T * diff) new_sigma += h[j][i] * (diff.T * diff) # break new_sigma /= sum_h matrices[i] = new_sigma new_pi = sum_h / float(N) pi_k[i] = new_pi # print (matrices) # if num_iter == 1: # sys.exit(1) new_log = calc_log(examples, centers, matrices, pi_k) # print (new_log) if abs(new_log - log) < EPS: break log = new_log # if k == 4: # print (log) if out_iterations: iter_out.write("#%d: %.2lf\n" % (num_iter, log)) # if changed == 0: # break # set the groups for K = 4 # print (groups) if not configuration and not out_iterations: if k == 4: for i in range(N): # print (examples[i][2]) # print (groups) groups[examples[i][2]].append((examples[i][1], probabilities[i])) with open(out_path + "/em-k4.dat", "a") as f: for i in range(k): # print (len(groups[i])) groups[i] = sorted(groups[i], key=lambda tup: tup[1], reverse=True) f.write("Grupa %d:\n" % (i+1)) for j in range(len(groups[i])): f.write(groups[i][j][0] + " %.2lf\n" % groups[i][j][1]) if i < k-1: f.write("--\n") with open(out_path + "/em-all.dat", "a") as f: f.write("K = %d\n" % k) for i in range(k): f.write("c%d:" % (i+1)) for j in range(dimension): f.write(" %.2lf" % centers[i][0][j]) f.write("\n") f.write("grupa %d: %d primjera\n" % ((i+1), group_count[i])) f.write("#iter: %d\n" % num_iter) f.write("log-izglednost: %.2lf\n" % log) if k < 5: f.write("--\n") elif not out_iterations: with open(out_path + "/em-konf.dat", "a") as f: f.write("Konfiguracija %d:\n" % conf_num) f.write("log-izglednost: %.2lf\n" % log) f.write("#iteracija: %d\n" % num_iter) if out_minus == True: f.write("--\n") # if i < k - 1: else: iter_out.write("--\n") for i in range(len(group_dicts)): iter_out.write("Grupa %d:" % (i+1)) buff = "" for key in group_dicts[i]: buff += " " + key + " %d" % group_dicts[i][key] + "," buff = buff[:-1] iter_out.write(buff + "\n")
def solve(list_k, examples, out_path): """ Does k-means for each k in the list of k_s for a particular set of examples. """ # start_centers = utils.get_centers ret_centers = [] size = len(examples[0][0]) out_all = open(out_path + "/kmeans-all.dat", "w") out_k4 = open(out_path + "/kmeans-k4.dat", "w") buffer_ = "" buffer_2 = "#iteracije: J\n--\n" for k in list_k: centers = utils.get_centers(k, examples) num_iter = 0 # labels = {} while True: # print (centers) num_iter += 1 changed = 0 classes = [] for cent in centers: classes.append([]) for count in range(len(examples)): ex = examples[count] bk = 0 dist = utils.euclid(ex[0], centers[0][0]) # labels for i in range(1, len(classes)): dist2 = utils.euclid(ex[0], centers[i][0]) if dist2 < dist: bk = i dist = dist2 classes[bk].append(ex) if not bk == ex[2]: changed = 1 examples[count][2] = bk if k == 4: buffer_2 += "#%d: " % (num_iter-1) + "%.2lf"\ % (utils.calc_error(k, classes, centers)) + "\n" if changed == 0: break for i in range(k): # print (i) # new_centers = [] a = np.zeros(size, dtype=np.float64) for j in range(len(classes[i])): a = a + classes[i][j][0] a = a / len(classes[i]) centers[i][0] = a # print (sum_) sum_ = utils.calc_error(k, classes, centers) if k == 4: buffer_2 += "--\n" buffer_ += "K = " + str(k) + "\n" for i in range(k): buffer_ += ("c%d: " % (i+1)) if k == 4: buffer_2 += "Grupa %d: " % (i+1) count = dict() for j in range(len(classes[i])): if classes[i][j][1] not in count: count[classes[i][j][1]] = 1 else: count[classes[i][j][1]] += 1 count = sorted(count.items(), key=lambda x:x[1], reverse=True) # print (count) for (key, val) in count: # print (key,val) buffer_2 += str(key) + " " + str(val) + ", " buffer_2 = buffer_2[:-2] + "\n" for j in range(size): buffer_ += "%.2lf" % centers[i][0][j] buffer_ += " " buffer_ = buffer_[:-1] + "\n" buffer_ += "grupa %d: " % (i+1) buffer_ += str(len(classes[i])) + " primjera\n" buffer_ += ("#iter: " + str(num_iter) + "\n") buffer_ += ("J: %.2lf" % (sum_) + "\n") buffer_ += ("--\n") if k == 4: out_k4.write(buffer_2) ret_centers = centers[:] out_all.write(buffer_[:-3]) return ret_centers
if __name__ == '__main__': img_dir = "./images" img_fp = os.path.join(img_dir, random.choice(os.listdir(img_dir))) print(img_fp) filter_polygon = True kuzu_seg = KuzuSegment() kuzu_cls = KuzuClassify() img, origin_image, origin_h, origin_w = kuzu_seg.load_image(img_fp) pred_bbox, pred_center = kuzu_seg.predict(img) # get all polygon area in image polygon_contours = make_contours(pred_bbox) # get all center points by contour method center_coords = get_centers(pred_center.astype(np.uint8)) no_center_points = len(center_coords) final_center = vis_pred_center(center_coords, rad=2) # filter polygon if filter_polygon: filtered_contours = filter_polygons_points_intersection(polygon_contours, center_coords) # noqa pred_bbox = vis_pred_bbox_polygon(pred_bbox, filtered_contours) final_bbox = vis_pred_bbox(pred_bbox, center_coords, width=2) y_ratio = origin_h / 512 x_ratio = origin_w / 512 pil_img = Image.fromarray(origin_image).convert('RGBA') char_canvas = Image.new('RGBA', pil_img.size) char_draw = ImageDraw.Draw(char_canvas)