Пример #1
0
def local_src_features(X, D, sparse_coder, n_class_atoms, n_jobs=1):
    n_samples = X.shape[1]
    n_classes = len(n_class_atoms)
    data = [X]
    args = [D, n_class_atoms, sparse_coder]
    Z_final = run_parallel(func=local_error, data=data, args=args, batched_args=None,
                           result_shape=(n_classes, n_samples), n_batches=100, mmap=False,
                           msg="building global SRC features", n_jobs=n_jobs)

    Z_final = norm_cols(Z_final)
    return Z_final
Пример #2
0
    def extract(self, imgs, pyramid_feat_extractor=None, save=True, n_jobs=4):

        if self.D is None:
            self.D = self.workspace.load("dict.npy")

        n_imgs = len(imgs)
        levels = (1, 2, 4)
        n_atoms = self.D.shape[1]
        n_features = np.sum(np.array(levels) ** 2) * n_atoms

        Z = run_parallel(func=pyramid_feat_extract, data=imgs, args=(pyramid_feat_extractor, self.D),
                         result_shape=(n_features, n_imgs), n_batches=100, mmap=self.mmap,
                         msg="building ScSPM features", n_jobs=n_jobs)

        if save:
            self.workspace.save("features.npy", Z)
        return Z
Пример #3
0
    def extract(self, imgs, pyramid_feat_extractor=None, save=True, n_jobs=4):

        if self.D is None:
            self.D = self.workspace.load("dict.npy")

        n_imgs = len(imgs)
        levels = (1, 2, 4)
        n_atoms = self.D.shape[1]
        n_features = np.sum(np.array(levels) ** 2) * n_atoms

        Z = run_parallel(func=pyramid_feat_extract, data=imgs, args=(pyramid_feat_extractor, self.D),
                         result_shape=(n_features, n_imgs), n_batches=100, mmap=self.mmap,
                         msg="building ScSPM features", n_jobs=n_jobs)

        if save:
            self.workspace.save("features.npy", Z)
        return Z
Пример #4
0
    def __call__(self, X, D):
        from lyssa.utils import set_openblas_threads
        if self.verbose:
            msg = "feature encoding"
        else:
            msg = None

        n_atoms = D.shape[1]
        n_samples = X.shape[1]

        n_batches = 100
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        if self.algorithm == 'soft_thresholding':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = None
            batched_args = None
            func = partial(
                soft_thresholding,
                nonzero_percentage=self.params.get('nonzero_percentage'),
                n_nonzero_coefs=self.params.get('n_nonzero_coefs'))

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func,
                         data=data,
                         args=args,
                         batched_args=batched_args,
                         result_shape=(n_atoms, n_samples),
                         n_batches=n_batches,
                         mmap=self.mmap,
                         msg=msg,
                         n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Пример #5
0
    def __call__(self, X, D):
        from lyssa.utils import set_openblas_threads
        if self.verbose:
            msg = "feature encoding"
        else:
            msg = None

        n_atoms = D.shape[1]
        n_samples = X.shape[1]

        n_batches = 100
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        if self.algorithm == 'soft_thresholding':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = None
            batched_args = None
            func = partial(soft_thresholding, nonzero_percentage=self.params.get('nonzero_percentage'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args,
                         result_shape=(n_atoms, n_samples), n_batches=n_batches,
                         mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Пример #6
0
    def __call__(self, X, D):
        # assume X has datapoints in columns
        # use self.params.get('key') because it does not throw exception
        # when the key does not exist, it just returns None.
        from lyssa.utils import set_openblas_threads

        n_samples = X.shape[1]
        n_atoms = D.shape[1]
        n_batches = 100

        if self.params.get('lambda') is not None:
            assert self.params.get('lambda') <= n_atoms

        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        batched_args = None

        if self.algorithm == 'omp':
            Gram = fast_dot(D.T, D)
            args = [D, Gram]
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(omp,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           tol=self.params.get('tol'))

        elif self.algorithm == 'bomp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            args = [D, Gram]
            data = X
            func = partial(batch_omp,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           tol=self.params.get('tol'))

        elif self.algorithm == 'thresh':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(
                thresholding,
                n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                nonzero_percentage=self.params.get('nonzero_percentage'))

        elif self.algorithm == "nnomp":
            args = [D]
            data = X
            func = partial(nn_omp,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           tol=self.params.get('tol'))

        elif self.algorithm == 'group_omp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(group_omp,
                           groups=self.params.get('groups'),
                           n_groups=self.params.get('n_groups'))
            args = [D, Gram]

        elif self.algorithm == 'sparse_group_omp':
            # group_omp(X,D,Gram,groups=None,n_groups=None)
            Gram = fast_dot(D.T, D)
            data = X
            # sparse_group_omp(X,D,Gram,groups=None,n_groups=None,n_nonzero_coefs=None)
            func = partial(sparse_group_omp,
                           groups=self.params.get('groups'),
                           n_groups=self.params.get('n_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'somp':
            Gram = fast_dot(D.T, D)
            data = X
            func = partial(somp,
                           data_groups=self.params.get('data_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'iht':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(
                thresholding,
                n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                nonzero_percentage=self.params.get('nonzero_percentage'))

            Z0 = run_parallel(func=func,
                              data=data,
                              args=args,
                              batched_args=batched_args,
                              result_shape=(n_atoms, n_samples),
                              n_batches=n_batches,
                              mmap=self.mmap,
                              n_jobs=self.n_jobs)

            R0 = fast_dot(D, Z0) - X
            data = X
            batched_args = [Z0, R0]
            args = [D]
            # iterative_hard_thresh(X,Z0,Alpha,D,eta=None,n_nonzero_coefs=None,n_iter=None)
            func = partial(iterative_hard_thresh,
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           eta=self.params.get('eta'),
                           n_iter=self.params.get('n_iter'))
            """params = sparse_coder['iterative_hard_thresh']
            learning_rate = params[0]
            threshold = params[1]
            max_iter = params[2]
            Z = iterative_hard_thresh(X,D,Z,learning_rate=learning_rate,threshold = threshold,max_iter = max_iter)
            """

        elif self.algorithm == 'lasso':
            return lasso(self.params.get('lambda'), self.n_jobs)(X, D)

        elif self.algorithm == 'llc':
            func = partial(llc, knn=self.params.get('knn'))
            data = X
            args = [D]
        else:
            raise ValueError("Sparse optimizer not found.")

        if self.verbose:
            msg = "sparse coding"
        else:
            msg = None

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid the hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func,
                         data=data,
                         args=args,
                         batched_args=batched_args,
                         result_shape=(n_atoms, n_samples),
                         n_batches=n_batches,
                         mmap=self.mmap,
                         msg=msg,
                         n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Пример #7
0
    def build_layer(self, layer=0):

        feature_maps_path = "layer" + str(layer) + "/feature_maps"
        output_img_path = os.path.join(self.workspace.base_path, "layer" + str(layer + 1) + "/imgs")
        if not os.path.exists(
                os.path.join(self.workspace.base_path, os.path.join("layer" + str(layer), "feature_map_dims.npy"))):
            self.build_feature_maps(layer)

        self.feature_maps[layer] = self.workspace.load(feature_maps_path, sparse="2D", online=True)
        levels = self.spm_levels
        features_path = os.path.join("layer" + str(layer), "features.npy")
        feature_map_dims = self.workspace.load(os.path.join("layer" + str(layer), "feature_map_dims.npy"), online=False)
        feature_map_dims = feature_map_dims.astype(int)
        n_imgs = len(self.feature_maps[layer])
        # make the formated indices
        idxs = [(len(str(n_imgs)) - len(str(i))) * '0' + str(i) for i in range(n_imgs)]
        spp = False
        conv = False
        pooling_step = None
        pooling_size = None
        if (not self.workspace.contains(features_path)) or self.rebuild_spp:
            n_cells = np.array(levels) ** 2
            n_total_cells = np.sum(n_cells)
            n_features = self.n_atoms[layer]
            # pre-allocate
            Z_final = np.zeros((n_total_cells * n_features, n_imgs))
            patch_size = self.filter_sizes[layer]

            spp = True

        if not os.path.exists(output_img_path):

            if layer < self.n_layers - 1:
                conv = True
                pooling_step = self.pooling_steps[layer]
                pooling_size = self.pooling_sizes[layer]
                os.makedirs(output_img_path)
            else:
                conv = False
                output_img_path = None

        if spp:
            print "building SPP layer on top of layer{0}".format(layer)
        if conv:
            print "building CONV layer on top of layer{0}".format(layer)

        if spp or conv:
            # if the layer is not done yet
            func = partial(pool_proc, pooling_size=pooling_size, spp_pooler=self.spp_pooler,
                           pooling_step=pooling_step, spp=spp, conv=conv, normalizer=self.spm_normalizer,
                           levels=self.spm_levels)
            n_batches = 100
            msg = "building next layer"
            data = self.feature_maps[layer]
            if spp:

                Z_final = run_parallel(func=func, data=data, args=[output_img_path],
                                       batched_args=[feature_map_dims.T, idxs],
                                       result_shape=(n_total_cells * n_features, n_imgs), n_batches=n_batches,
                                       mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)
                self.workspace.save(features_path, Z_final)
            else:

                run_parallel(func=func, data=data, args=[output_img_path], batched_args=[feature_map_dims.T, idxs],
                             result_shape=None, n_batches=n_batches,
                             mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)

            if conv:
                img_dims = np.zeros((3, n_imgs)).astype(int)
                img_dim_files = [os.path.join(output_img_path, f) for f in os.listdir(output_img_path)
                                 if os.path.isfile(os.path.join(output_img_path, f)) and f.startswith("img_dim")]
                img_dim_files.sort()
                for i, img_dim_file in enumerate(img_dim_files):
                    dims = np.load(img_dim_file)
                    img_dims[:, i] = dims
                    os.remove(img_dim_file)
                self.workspace.save(os.path.join(output_img_path, "dims.npy"), img_dims)

        if layer < self.n_layers - 1:
            self.imgs[layer + 1] = online_reader(path=output_img_path, sparse="3D", prefix="img", suffix="npz")
Пример #8
0
    def __call__(self, X, D):
        # assume X has datapoints in columns
        # use self.params.get('key') because it does not throw exception
        # when the key does not exist, it just returns None.
        from lyssa.utils import set_openblas_threads

        n_samples = X.shape[1]
        n_atoms = D.shape[1]
        n_batches = 100

        if self.params.get('lambda') is not None:
            assert self.params.get('lambda') <= n_atoms

        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        batched_args = None

        if self.algorithm == 'omp':
            Gram = fast_dot(D.T, D)
            args = [D, Gram]
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol'))

        elif self.algorithm == 'bomp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            args = [D, Gram]
            data = X
            func = partial(batch_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol'))

        elif self.algorithm == 'thresh':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           nonzero_percentage=self.params.get('nonzero_percentage'))

        elif self.algorithm == "nnomp":
            args = [D]
            data = X
            func = partial(nn_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol'))

        elif self.algorithm == 'group_omp':
            Gram = fast_dot(D.T, D)
            Alpha = fast_dot(D.T, X)
            batched_args = [Alpha]
            data = X
            func = partial(group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups'))
            args = [D, Gram]

        elif self.algorithm == 'sparse_group_omp':
            # group_omp(X,D,Gram,groups=None,n_groups=None)
            Gram = fast_dot(D.T, D)
            data = X
            # sparse_group_omp(X,D,Gram,groups=None,n_groups=None,n_nonzero_coefs=None)
            func = partial(sparse_group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'somp':
            Gram = fast_dot(D.T, D)
            data = X
            func = partial(somp, data_groups=self.params.get('data_groups'),
                           n_nonzero_coefs=self.params.get('n_nonzero_coefs'))
            args = [D, Gram]

        elif self.algorithm == 'iht':
            Alpha = fast_dot(D.T, X)
            data = Alpha
            args = []
            func = partial(thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           nonzero_percentage=self.params.get('nonzero_percentage'))

            Z0 = run_parallel(func=func, data=data, args=args, batched_args=batched_args,
                              result_shape=(n_atoms, n_samples), n_batches=n_batches,
                              mmap=self.mmap, n_jobs=self.n_jobs)

            R0 = fast_dot(D, Z0) - X
            data = X
            batched_args = [Z0, R0]
            args = [D]
            # iterative_hard_thresh(X,Z0,Alpha,D,eta=None,n_nonzero_coefs=None,n_iter=None)
            func = partial(iterative_hard_thresh, n_nonzero_coefs=self.params.get('n_nonzero_coefs'),
                           eta=self.params.get('eta'), n_iter=self.params.get('n_iter'))
            """params = sparse_coder['iterative_hard_thresh']
            learning_rate = params[0]
            threshold = params[1]
            max_iter = params[2]
            Z = iterative_hard_thresh(X,D,Z,learning_rate=learning_rate,threshold = threshold,max_iter = max_iter)
            """

        elif self.algorithm == 'lasso':
            return lasso(self.params.get('lambda'), self.n_jobs)(X, D)

        elif self.algorithm == 'llc':
            func = partial(llc, knn=self.params.get('knn'))
            data = X
            args = [D]
        else:
            raise ValueError("Sparse optimizer not found.")

        if self.verbose:
            msg = "sparse coding"
        else:
            msg = None

        if self.n_jobs > 1:
            # disable OpenBLAS to
            # avoid the hanging problem
            set_openblas_threads(1)

        Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args,
                         result_shape=(n_atoms, n_samples), n_batches=n_batches,
                         mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)

        # restore the previous setting
        if self.n_jobs > 1:
            set_openblas_threads(self.n_jobs)

        return Z
Пример #9
0
    def build_layer(self, layer=0):

        feature_maps_path = "layer" + str(layer) + "/feature_maps"
        output_img_path = os.path.join(self.workspace.base_path, "layer" + str(layer + 1) + "/imgs")
        if not os.path.exists(
                os.path.join(self.workspace.base_path, os.path.join("layer" + str(layer), "feature_map_dims.npy"))):
            self.build_feature_maps(layer)

        self.feature_maps[layer] = self.workspace.load(feature_maps_path, sparse="2D", online=True)
        levels = self.spm_levels
        features_path = os.path.join("layer" + str(layer), "features.npy")
        feature_map_dims = self.workspace.load(os.path.join("layer" + str(layer), "feature_map_dims.npy"), online=False)
        feature_map_dims = feature_map_dims.astype(int)
        n_imgs = len(self.feature_maps[layer])
        # make the formated indices
        idxs = [(len(str(n_imgs)) - len(str(i))) * '0' + str(i) for i in range(n_imgs)]
        spp = False
        conv = False
        pooling_step = None
        pooling_size = None
        if (not self.workspace.contains(features_path)) or self.rebuild_spp:
            n_cells = np.array(levels) ** 2
            n_total_cells = np.sum(n_cells)
            n_features = self.n_atoms[layer]
            # pre-allocate
            Z_final = np.zeros((n_total_cells * n_features, n_imgs))

            spp = True

        if not os.path.exists(output_img_path):

            if layer < self.n_layers - 1:
                conv = True
                pooling_step = self.pooling_steps[layer]
                pooling_size = self.pooling_sizes[layer]
                os.makedirs(output_img_path)
            else:
                conv = False
                output_img_path = None

        if spp:
            print "building SPP layer on top of layer{0}".format(layer)
        if conv:
            print "building CONV layer on top of layer{0}".format(layer)

        if spp or conv:
            # if the layer is not done yet
            func = partial(pool_proc, pooling_size=pooling_size, spp_pooler=self.spp_pooler,
                           pooling_step=pooling_step, spp=spp, conv=conv, normalizer=self.spm_normalizer,
                           levels=self.spm_levels)
            n_batches = 100
            msg = "building next layer"
            data = self.feature_maps[layer]
            if spp:

                Z_final = run_parallel(func=func, data=data, args=[output_img_path],
                                       batched_args=[feature_map_dims.T, idxs],
                                       result_shape=(n_total_cells * n_features, n_imgs), n_batches=n_batches,
                                       mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)
                self.workspace.save(features_path, Z_final)
            else:

                run_parallel(func=func, data=data, args=[output_img_path], batched_args=[feature_map_dims.T, idxs],
                             result_shape=None, n_batches=n_batches,
                             mmap=self.mmap, msg=msg, n_jobs=self.n_jobs)

            if conv:
                img_dims = np.zeros((3, n_imgs)).astype(int)
                img_dim_files = [os.path.join(output_img_path, f) for f in os.listdir(output_img_path)
                                 if os.path.isfile(os.path.join(output_img_path, f)) and f.startswith("img_dim")]
                img_dim_files.sort()
                for i, img_dim_file in enumerate(img_dim_files):
                    dims = np.load(img_dim_file)
                    img_dims[:, i] = dims
                    os.remove(img_dim_file)
                self.workspace.save(os.path.join(output_img_path, "dims.npy"), img_dims)

        if layer < self.n_layers - 1:
            self.imgs[layer + 1] = online_reader(path=output_img_path, sparse="3D", prefix="img", suffix="npz")