示例#1
0
    def predict_raw(self, D):
        """ returns dictionary of raw results """
        network = self.network_shim()
        prY = np.squeeze(network.predict(D)['Y'])
        pr = Frame(D.items(), Y=prY)

        result = defaultdict(list)
        splits = int(np.ceil(pr.N / 5000))
        for i, idx in enumerate(np.array_split(np.arange(pr.N), splits)):
            print("\rworking on split {i} of {n}...".format(i=i + 1, n=splits),
                  end="")
            memberships = self.cluster_est.predict(
                self._pipe_transform(pr[idx]))
            result["memberships"] += [memberships]
        print()
        pr.update([(k, np.concatenate(v)) for k, v in result.items()])

        return pr
示例#2
0
文件: knn.py 项目: qk/unn
    def predict_raw(self, D, skip_closest=0):
        """ returns dictionary of raw results """
        network = self.network_shim()
        prY = np.squeeze(network.predict(D)['Y'])
        pr = Frame(D.items(), Y=prY)

        if not hasattr(self, "_pipe"):
            raise RuntimeError(
                "Estimator is not fitted yet. The neural network shim should decide if training is necessary or not."
            )
        X = self._pipe_transform(pr)
        refX = self._pipe_transform(self.ref)

        if self.method == 'mahalanobis':  # takes a long time
            if not self.silent: print("caching inverse covariance matrix ...")
            self.mahalanobis_uncertainty(X[:2],
                                         refX[:2],
                                         refX,
                                         n_jobs=self.n_jobs)

        k = self.neighbors
        knn = self.knn
        knn.set_params(n_neighbors=k)

        result = defaultdict(list)
        splits = int(np.ceil(len(pr) / 5000))
        for i, idx in enumerate(np.array_split(np.arange(len(pr)), splits)):
            if not self.silent:
                print("working on split {i} of {n}...".format(i=i + 1,
                                                              n=splits),
                      end="\r")
            distances, neighbors = (A[:, skip_closest:]
                                    for A in knn.kneighbors(X[idx]))
            knnR = self.ref_results['R'][
                neighbors]  # signed residuals in neighborhood
            knnY = self.ref['Y'][neighbors]  # predictions in neighborhood
            knnT = self.ref['T'][neighbors]  # labels (targets) in neighborhood
            # print(knnY.shape, "knnY shape") # (n_points_in_split, n_neighbors)
            result['avgDist'] += [distances.mean(axis=1)]
            result['meanR'] += [knnR.mean(axis=1)]  # for stdR
            result['meanY'] += [knnY.mean(axis=1)]
            result['meanT'] += [knnT.mean(axis=1)]
            result['stdR'] += [
                (((knnR - result['meanR'][-1][:, None])**2).sum(axis=1) /
                 (k - 1))**0.5
            ]
            result['absR'] += [np.abs(knnR).mean(axis=1)]
            result['varT'] += [np.var(knnT, axis=1, ddof=1)]  # cs_knnV
            result['stdT'] += [np.var(knnT, axis=1,
                                      ddof=1)**0.5]  # cs_knnV**0.5
            result['sqR'] += [(knnR**2).mean(axis=1)]  # cs_knnE
            if self.method == 'mahalanobis':  # takes a long time, exceeds memory limits if not split like this
                VI = self._mahalanobis_params['VI']  # precomputed above
                dists = Parallel(self.n_jobs, 'threading', verbose=0)(
                    delayed(run)(self._mahalanobis_uncertainty_job,
                                 X[idx[i]][None, :],
                                 refX[neighbors[i]],
                                 VI=VI) for i in range(len(idx)))
                result['mn-dist'] += [np.concatenate(dists)]

        if not self.silent: print()
        pr.update([(k, np.concatenate(v)) for k, v in result.items()])
        return pr