def predict_raw(self, D): """ returns dictionary of raw results """ network = self.network_shim() prY = np.squeeze(network.predict(D)['Y']) pr = Frame(D.items(), Y=prY) result = defaultdict(list) splits = int(np.ceil(pr.N / 5000)) for i, idx in enumerate(np.array_split(np.arange(pr.N), splits)): print("\rworking on split {i} of {n}...".format(i=i + 1, n=splits), end="") memberships = self.cluster_est.predict( self._pipe_transform(pr[idx])) result["memberships"] += [memberships] print() pr.update([(k, np.concatenate(v)) for k, v in result.items()]) return pr
def predict_raw(self, D, skip_closest=0): """ returns dictionary of raw results """ network = self.network_shim() prY = np.squeeze(network.predict(D)['Y']) pr = Frame(D.items(), Y=prY) if not hasattr(self, "_pipe"): raise RuntimeError( "Estimator is not fitted yet. The neural network shim should decide if training is necessary or not." ) X = self._pipe_transform(pr) refX = self._pipe_transform(self.ref) if self.method == 'mahalanobis': # takes a long time if not self.silent: print("caching inverse covariance matrix ...") self.mahalanobis_uncertainty(X[:2], refX[:2], refX, n_jobs=self.n_jobs) k = self.neighbors knn = self.knn knn.set_params(n_neighbors=k) result = defaultdict(list) splits = int(np.ceil(len(pr) / 5000)) for i, idx in enumerate(np.array_split(np.arange(len(pr)), splits)): if not self.silent: print("working on split {i} of {n}...".format(i=i + 1, n=splits), end="\r") distances, neighbors = (A[:, skip_closest:] for A in knn.kneighbors(X[idx])) knnR = self.ref_results['R'][ neighbors] # signed residuals in neighborhood knnY = self.ref['Y'][neighbors] # predictions in neighborhood knnT = self.ref['T'][neighbors] # labels (targets) in neighborhood # print(knnY.shape, "knnY shape") # (n_points_in_split, n_neighbors) result['avgDist'] += [distances.mean(axis=1)] result['meanR'] += [knnR.mean(axis=1)] # for stdR result['meanY'] += [knnY.mean(axis=1)] result['meanT'] += [knnT.mean(axis=1)] result['stdR'] += [ (((knnR - result['meanR'][-1][:, None])**2).sum(axis=1) / (k - 1))**0.5 ] result['absR'] += [np.abs(knnR).mean(axis=1)] result['varT'] += [np.var(knnT, axis=1, ddof=1)] # cs_knnV result['stdT'] += [np.var(knnT, axis=1, ddof=1)**0.5] # cs_knnV**0.5 result['sqR'] += [(knnR**2).mean(axis=1)] # cs_knnE if self.method == 'mahalanobis': # takes a long time, exceeds memory limits if not split like this VI = self._mahalanobis_params['VI'] # precomputed above dists = Parallel(self.n_jobs, 'threading', verbose=0)( delayed(run)(self._mahalanobis_uncertainty_job, X[idx[i]][None, :], refX[neighbors[i]], VI=VI) for i in range(len(idx))) result['mn-dist'] += [np.concatenate(dists)] if not self.silent: print() pr.update([(k, np.concatenate(v)) for k, v in result.items()]) return pr