def predict(self, X): n_q = len(X) self._X = R.Tensor(X) d_list = self.__eucledian_distance(self._X) # print(d_list) fe = d_list.foreach(operation='sort') sl = fe.foreach(operation='slice', begin=0, size=self.k) while sl.status != "computed": pass pred = R.Tensor([], name="prediction") for i in range(n_q): row = R.gather(d_list, R.Tensor([i])).reshape(shape=[self.n]) values = sl.gather(R.Tensor([i])).reshape(shape=[self.k]) while values.status != 'computed': pass ind = R.find_indices(row, values) while ind.status != 'computed': pass ind = ind.foreach(operation='slice', begin=0, size=1) y_neighbours = R.gather(self.Y, ind).reshape(shape=[self.k]) while y_neighbours.status != 'computed': pass pred = pred.concat(R.mean(y_neighbours).expand_dims(axis=0)) while pred.status != 'computed': pass print(pred) while pred.status != 'computed': pass self._label = pred return pred
def fit(self, X): self.n_q = len(X) self.X = Tensor(X) d_list = self.eucledian_distance(self.X) while d_list.status != "computed": pass #print(d_list) fe = d_list.foreach(operation='sort') sl = fe.foreach(operation='slice', begin=0, size=self.k) while sl.status != "computed": pass #print(sl) li = sl.output.tolist() for i in range(self.n_q): row = R.gather(d_list, Tensor([i])).reshape(shape=[self.n]) while row.status != 'computed': pass #print(row) ind = R.find_indices(row, values=li[i]) while ind.status != 'computed': pass #ind.foreach() #print(ind) ind = ind.foreach(operation='slice', begin=0, size=1) y_neighbours = R.gather(self.Y, ind) while y_neighbours.status != 'computed': pass print(y_neighbours) pass
def update_centroids(self): gather = R.gather(self.points, R.find_indices(self.label, values=[0])).mean(axis=1) for i in range(1, self.k): ind = R.find_indices(self.label, values=[i]) gat = R.gather(self.points, ind).mean(axis=1) gather = R.concat(gather, gat) self.centroids = gather.reshape( shape=[self.k, len(self.points.output[0])]) inform_server()
def update_centroids(self): gather = self._points.gather(R.find_indices(self._label, Tensor([0]))).mean(axis=1) for i in range(1, self.k): ind = R.find_indices(self._label, Tensor([i])) gat = R.gather(self._points, ind).mean(axis=1) gather = R.concat(gather, gat) self.centroids = gather.reshape( shape=[self.k, len(self._points.output[0])])
def update_centroids(self, points, label): while label.status != 'computed': pass if 0 in label.output: gather = R.gather(points, R.find_indices(label, values=[0])).mean(axis=1) else: gather = R.gather(self.centroids, Tensor([0])).expand_dims(axis=0) for i in range(1, self.k): if i in label.output: ind = R.find_indices(label, values=[i]) gat = R.gather(points, ind).mean(axis=1) else: gat = R.gather(self.centroids, Tensor([i])).expand_dims(axis=0) gather = R.concat(gather, gat) while gat.status != 'computed': pass return gather.reshape(shape=[self.k, len(self.points.output[0])])
def predict(self, X): n_q = len(X) X = Tensor(X) d_list = self.__euclidean_distance(X) fe = d_list.foreach(operation='sort') sl = fe.foreach(operation='slice', begin=0, size=self._k) label = R.Tensor([], name="label") for i in range(n_q): row = R.gather(d_list, Tensor([i])).reshape(shape=[self._n]) values = sl.gather(Tensor([i])).reshape(shape=[self._k]) print(values, row) ind = R.find_indices(row, values) ind = ind.foreach(operation='slice', begin=0, size=1) y_neighbours = R.gather(self._y, ind).reshape(shape=[self._k]) label = label.concat(R.mode(y_neighbours)) # Store labels locally self._labels = label return label
def find_split(self, X, y): ideal_col = None ideal_threshold = None num_observations = y.shape_().gather(R.Scalar(0)) while num_observations.status != 'computed': pass num_observations = int(num_observations.output) if num_observations <= 1: return ideal_col, ideal_threshold y = y.reshape(shape=[num_observations]) count_in_parent = R.Tensor([]) for c in range(self.num_classes): count_in_parent = count_in_parent.concat( R.sum(R.equal(y, R.Scalar(c))).expand_dims()) gini = R.square( count_in_parent.foreach(operation='div', params=num_observations)) best_gini = R.sub(R.Scalar(1.0), R.sum(gini)) temp_y = y.reshape(shape=[num_observations, 1]) for col in range(self.num_features): temp_X = R.gather( R.transpose(X), R.Scalar(col)).reshape(shape=[num_observations, 1]) all_data = R.concat(temp_X, temp_y, axis=1) column = R.gather(R.transpose(X), R.Scalar(col)) ind = column.find_indices(R.sort(R.unique(column))) while ind.status != "computed": pass inform_server() sorted_data = R.Tensor([]) for i in ind.output: sorted_data = sorted_data.concat(all_data.gather( R.Tensor(i))) # need to find another way to sort sorted_data_tpose = sorted_data.transpose() thresholds = sorted_data_tpose.gather(R.Scalar(0)).gather( R.Scalar(0)) obs_classes = sorted_data_tpose.gather(R.Scalar(1)).gather( R.Scalar(0)) num_left = R.Tensor([0] * self.num_classes) # need ops num_right = count_in_parent for i in range(1, num_observations): class_ = R.gather(obs_classes, R.Tensor([i - 1])) classencoding = R.one_hot_encoding( class_, depth=self.num_classes).gather(R.Scalar(0)) num_left = num_left.add(classencoding) num_right = num_right.sub(classencoding) gini_left = R.sub( R.Scalar(1), R.sum( R.square(R.foreach(num_left, operation='div', params=i)))) gini_right = R.sub( R.Scalar(1), R.sum( R.square( R.foreach(num_right, operation='div', params=num_observations - i)))) gini = R.div( R.add( R.multiply(R.Scalar(i), gini_left), R.multiply(R.Scalar(num_observations - i), gini_right)), R.Scalar(num_observations)) decision1 = R.logical_and(thresholds.gather(R.Tensor([i])), thresholds.gather(R.Tensor([i - 1]))) decision2 = gini.less(best_gini) while decision2.status != "computed": pass print(decision2.output == 1) if decision2.output == 1 and decision1 != 1: best_gini = gini ideal_col = col ideal_threshold = R.div( R.add(thresholds.gather(R.Tensor([i])), thresholds.gather(R.Tensor([i - 1]))), R.Scalar(2)) print(ideal_col, ideal_threshold) return ideal_col, ideal_threshold