def local_src_features(X, D, sparse_coder, n_class_atoms, n_jobs=1): n_samples = X.shape[1] n_classes = len(n_class_atoms) data = [X] args = [D, n_class_atoms, sparse_coder] Z_final = run_parallel(func=local_error, data=data, args=args, batched_args=None, result_shape=(n_classes, n_samples), n_batches=100, mmap=False, msg="building global SRC features", n_jobs=n_jobs) Z_final = norm_cols(Z_final) return Z_final
def extract(self, imgs, pyramid_feat_extractor=None, save=True, n_jobs=4): if self.D is None: self.D = self.workspace.load("dict.npy") n_imgs = len(imgs) levels = (1, 2, 4) n_atoms = self.D.shape[1] n_features = np.sum(np.array(levels) ** 2) * n_atoms Z = run_parallel(func=pyramid_feat_extract, data=imgs, args=(pyramid_feat_extractor, self.D), result_shape=(n_features, n_imgs), n_batches=100, mmap=self.mmap, msg="building ScSPM features", n_jobs=n_jobs) if save: self.workspace.save("features.npy", Z) return Z
def __call__(self, X, D): from lyssa.utils import set_openblas_threads if self.verbose: msg = "feature encoding" else: msg = None n_atoms = D.shape[1] n_samples = X.shape[1] n_batches = 100 if self.n_jobs > 1: set_openblas_threads(self.n_jobs) if self.algorithm == 'soft_thresholding': Alpha = fast_dot(D.T, X) data = Alpha args = None batched_args = None func = partial( soft_thresholding, nonzero_percentage=self.params.get('nonzero_percentage'), n_nonzero_coefs=self.params.get('n_nonzero_coefs')) if self.n_jobs > 1: # disable OpenBLAS to # avoid hanging problem set_openblas_threads(1) Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args, result_shape=(n_atoms, n_samples), n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) # restore the previous setting if self.n_jobs > 1: set_openblas_threads(self.n_jobs) return Z
def __call__(self, X, D): from lyssa.utils import set_openblas_threads if self.verbose: msg = "feature encoding" else: msg = None n_atoms = D.shape[1] n_samples = X.shape[1] n_batches = 100 if self.n_jobs > 1: set_openblas_threads(self.n_jobs) if self.algorithm == 'soft_thresholding': Alpha = fast_dot(D.T, X) data = Alpha args = None batched_args = None func = partial(soft_thresholding, nonzero_percentage=self.params.get('nonzero_percentage'), n_nonzero_coefs=self.params.get('n_nonzero_coefs')) if self.n_jobs > 1: # disable OpenBLAS to # avoid hanging problem set_openblas_threads(1) Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args, result_shape=(n_atoms, n_samples), n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) # restore the previous setting if self.n_jobs > 1: set_openblas_threads(self.n_jobs) return Z
def __call__(self, X, D): # assume X has datapoints in columns # use self.params.get('key') because it does not throw exception # when the key does not exist, it just returns None. from lyssa.utils import set_openblas_threads n_samples = X.shape[1] n_atoms = D.shape[1] n_batches = 100 if self.params.get('lambda') is not None: assert self.params.get('lambda') <= n_atoms if self.n_jobs > 1: set_openblas_threads(self.n_jobs) batched_args = None if self.algorithm == 'omp': Gram = fast_dot(D.T, D) args = [D, Gram] Alpha = fast_dot(D.T, X) batched_args = [Alpha] data = X func = partial(omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol')) elif self.algorithm == 'bomp': Gram = fast_dot(D.T, D) Alpha = fast_dot(D.T, X) batched_args = [Alpha] args = [D, Gram] data = X func = partial(batch_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol')) elif self.algorithm == 'thresh': Alpha = fast_dot(D.T, X) data = Alpha args = [] func = partial( thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), nonzero_percentage=self.params.get('nonzero_percentage')) elif self.algorithm == "nnomp": args = [D] data = X func = partial(nn_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol')) elif self.algorithm == 'group_omp': Gram = fast_dot(D.T, D) Alpha = fast_dot(D.T, X) batched_args = [Alpha] data = X func = partial(group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups')) args = [D, Gram] elif self.algorithm == 'sparse_group_omp': # group_omp(X,D,Gram,groups=None,n_groups=None) Gram = fast_dot(D.T, D) data = X # sparse_group_omp(X,D,Gram,groups=None,n_groups=None,n_nonzero_coefs=None) func = partial(sparse_group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups'), n_nonzero_coefs=self.params.get('n_nonzero_coefs')) args = [D, Gram] elif self.algorithm == 'somp': Gram = fast_dot(D.T, D) data = X func = partial(somp, data_groups=self.params.get('data_groups'), n_nonzero_coefs=self.params.get('n_nonzero_coefs')) args = [D, Gram] elif self.algorithm == 'iht': Alpha = fast_dot(D.T, X) data = Alpha args = [] func = partial( thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), nonzero_percentage=self.params.get('nonzero_percentage')) Z0 = run_parallel(func=func, data=data, args=args, batched_args=batched_args, result_shape=(n_atoms, n_samples), n_batches=n_batches, mmap=self.mmap, n_jobs=self.n_jobs) R0 = fast_dot(D, Z0) - X data = X batched_args = [Z0, R0] args = [D] # iterative_hard_thresh(X,Z0,Alpha,D,eta=None,n_nonzero_coefs=None,n_iter=None) func = partial(iterative_hard_thresh, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), eta=self.params.get('eta'), n_iter=self.params.get('n_iter')) """params = sparse_coder['iterative_hard_thresh'] learning_rate = params[0] threshold = params[1] max_iter = params[2] Z = iterative_hard_thresh(X,D,Z,learning_rate=learning_rate,threshold = threshold,max_iter = max_iter) """ elif self.algorithm == 'lasso': return lasso(self.params.get('lambda'), self.n_jobs)(X, D) elif self.algorithm == 'llc': func = partial(llc, knn=self.params.get('knn')) data = X args = [D] else: raise ValueError("Sparse optimizer not found.") if self.verbose: msg = "sparse coding" else: msg = None if self.n_jobs > 1: # disable OpenBLAS to # avoid the hanging problem set_openblas_threads(1) Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args, result_shape=(n_atoms, n_samples), n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) # restore the previous setting if self.n_jobs > 1: set_openblas_threads(self.n_jobs) return Z
def build_layer(self, layer=0): feature_maps_path = "layer" + str(layer) + "/feature_maps" output_img_path = os.path.join(self.workspace.base_path, "layer" + str(layer + 1) + "/imgs") if not os.path.exists( os.path.join(self.workspace.base_path, os.path.join("layer" + str(layer), "feature_map_dims.npy"))): self.build_feature_maps(layer) self.feature_maps[layer] = self.workspace.load(feature_maps_path, sparse="2D", online=True) levels = self.spm_levels features_path = os.path.join("layer" + str(layer), "features.npy") feature_map_dims = self.workspace.load(os.path.join("layer" + str(layer), "feature_map_dims.npy"), online=False) feature_map_dims = feature_map_dims.astype(int) n_imgs = len(self.feature_maps[layer]) # make the formated indices idxs = [(len(str(n_imgs)) - len(str(i))) * '0' + str(i) for i in range(n_imgs)] spp = False conv = False pooling_step = None pooling_size = None if (not self.workspace.contains(features_path)) or self.rebuild_spp: n_cells = np.array(levels) ** 2 n_total_cells = np.sum(n_cells) n_features = self.n_atoms[layer] # pre-allocate Z_final = np.zeros((n_total_cells * n_features, n_imgs)) patch_size = self.filter_sizes[layer] spp = True if not os.path.exists(output_img_path): if layer < self.n_layers - 1: conv = True pooling_step = self.pooling_steps[layer] pooling_size = self.pooling_sizes[layer] os.makedirs(output_img_path) else: conv = False output_img_path = None if spp: print "building SPP layer on top of layer{0}".format(layer) if conv: print "building CONV layer on top of layer{0}".format(layer) if spp or conv: # if the layer is not done yet func = partial(pool_proc, pooling_size=pooling_size, spp_pooler=self.spp_pooler, pooling_step=pooling_step, spp=spp, conv=conv, normalizer=self.spm_normalizer, levels=self.spm_levels) n_batches = 100 msg = "building next layer" data = self.feature_maps[layer] if spp: Z_final = run_parallel(func=func, data=data, args=[output_img_path], batched_args=[feature_map_dims.T, idxs], result_shape=(n_total_cells * n_features, n_imgs), n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) self.workspace.save(features_path, Z_final) else: run_parallel(func=func, data=data, args=[output_img_path], batched_args=[feature_map_dims.T, idxs], result_shape=None, n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) if conv: img_dims = np.zeros((3, n_imgs)).astype(int) img_dim_files = [os.path.join(output_img_path, f) for f in os.listdir(output_img_path) if os.path.isfile(os.path.join(output_img_path, f)) and f.startswith("img_dim")] img_dim_files.sort() for i, img_dim_file in enumerate(img_dim_files): dims = np.load(img_dim_file) img_dims[:, i] = dims os.remove(img_dim_file) self.workspace.save(os.path.join(output_img_path, "dims.npy"), img_dims) if layer < self.n_layers - 1: self.imgs[layer + 1] = online_reader(path=output_img_path, sparse="3D", prefix="img", suffix="npz")
def __call__(self, X, D): # assume X has datapoints in columns # use self.params.get('key') because it does not throw exception # when the key does not exist, it just returns None. from lyssa.utils import set_openblas_threads n_samples = X.shape[1] n_atoms = D.shape[1] n_batches = 100 if self.params.get('lambda') is not None: assert self.params.get('lambda') <= n_atoms if self.n_jobs > 1: set_openblas_threads(self.n_jobs) batched_args = None if self.algorithm == 'omp': Gram = fast_dot(D.T, D) args = [D, Gram] Alpha = fast_dot(D.T, X) batched_args = [Alpha] data = X func = partial(omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol')) elif self.algorithm == 'bomp': Gram = fast_dot(D.T, D) Alpha = fast_dot(D.T, X) batched_args = [Alpha] args = [D, Gram] data = X func = partial(batch_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol')) elif self.algorithm == 'thresh': Alpha = fast_dot(D.T, X) data = Alpha args = [] func = partial(thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), nonzero_percentage=self.params.get('nonzero_percentage')) elif self.algorithm == "nnomp": args = [D] data = X func = partial(nn_omp, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), tol=self.params.get('tol')) elif self.algorithm == 'group_omp': Gram = fast_dot(D.T, D) Alpha = fast_dot(D.T, X) batched_args = [Alpha] data = X func = partial(group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups')) args = [D, Gram] elif self.algorithm == 'sparse_group_omp': # group_omp(X,D,Gram,groups=None,n_groups=None) Gram = fast_dot(D.T, D) data = X # sparse_group_omp(X,D,Gram,groups=None,n_groups=None,n_nonzero_coefs=None) func = partial(sparse_group_omp, groups=self.params.get('groups'), n_groups=self.params.get('n_groups'), n_nonzero_coefs=self.params.get('n_nonzero_coefs')) args = [D, Gram] elif self.algorithm == 'somp': Gram = fast_dot(D.T, D) data = X func = partial(somp, data_groups=self.params.get('data_groups'), n_nonzero_coefs=self.params.get('n_nonzero_coefs')) args = [D, Gram] elif self.algorithm == 'iht': Alpha = fast_dot(D.T, X) data = Alpha args = [] func = partial(thresholding, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), nonzero_percentage=self.params.get('nonzero_percentage')) Z0 = run_parallel(func=func, data=data, args=args, batched_args=batched_args, result_shape=(n_atoms, n_samples), n_batches=n_batches, mmap=self.mmap, n_jobs=self.n_jobs) R0 = fast_dot(D, Z0) - X data = X batched_args = [Z0, R0] args = [D] # iterative_hard_thresh(X,Z0,Alpha,D,eta=None,n_nonzero_coefs=None,n_iter=None) func = partial(iterative_hard_thresh, n_nonzero_coefs=self.params.get('n_nonzero_coefs'), eta=self.params.get('eta'), n_iter=self.params.get('n_iter')) """params = sparse_coder['iterative_hard_thresh'] learning_rate = params[0] threshold = params[1] max_iter = params[2] Z = iterative_hard_thresh(X,D,Z,learning_rate=learning_rate,threshold = threshold,max_iter = max_iter) """ elif self.algorithm == 'lasso': return lasso(self.params.get('lambda'), self.n_jobs)(X, D) elif self.algorithm == 'llc': func = partial(llc, knn=self.params.get('knn')) data = X args = [D] else: raise ValueError("Sparse optimizer not found.") if self.verbose: msg = "sparse coding" else: msg = None if self.n_jobs > 1: # disable OpenBLAS to # avoid the hanging problem set_openblas_threads(1) Z = run_parallel(func=func, data=data, args=args, batched_args=batched_args, result_shape=(n_atoms, n_samples), n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) # restore the previous setting if self.n_jobs > 1: set_openblas_threads(self.n_jobs) return Z
def build_layer(self, layer=0): feature_maps_path = "layer" + str(layer) + "/feature_maps" output_img_path = os.path.join(self.workspace.base_path, "layer" + str(layer + 1) + "/imgs") if not os.path.exists( os.path.join(self.workspace.base_path, os.path.join("layer" + str(layer), "feature_map_dims.npy"))): self.build_feature_maps(layer) self.feature_maps[layer] = self.workspace.load(feature_maps_path, sparse="2D", online=True) levels = self.spm_levels features_path = os.path.join("layer" + str(layer), "features.npy") feature_map_dims = self.workspace.load(os.path.join("layer" + str(layer), "feature_map_dims.npy"), online=False) feature_map_dims = feature_map_dims.astype(int) n_imgs = len(self.feature_maps[layer]) # make the formated indices idxs = [(len(str(n_imgs)) - len(str(i))) * '0' + str(i) for i in range(n_imgs)] spp = False conv = False pooling_step = None pooling_size = None if (not self.workspace.contains(features_path)) or self.rebuild_spp: n_cells = np.array(levels) ** 2 n_total_cells = np.sum(n_cells) n_features = self.n_atoms[layer] # pre-allocate Z_final = np.zeros((n_total_cells * n_features, n_imgs)) spp = True if not os.path.exists(output_img_path): if layer < self.n_layers - 1: conv = True pooling_step = self.pooling_steps[layer] pooling_size = self.pooling_sizes[layer] os.makedirs(output_img_path) else: conv = False output_img_path = None if spp: print "building SPP layer on top of layer{0}".format(layer) if conv: print "building CONV layer on top of layer{0}".format(layer) if spp or conv: # if the layer is not done yet func = partial(pool_proc, pooling_size=pooling_size, spp_pooler=self.spp_pooler, pooling_step=pooling_step, spp=spp, conv=conv, normalizer=self.spm_normalizer, levels=self.spm_levels) n_batches = 100 msg = "building next layer" data = self.feature_maps[layer] if spp: Z_final = run_parallel(func=func, data=data, args=[output_img_path], batched_args=[feature_map_dims.T, idxs], result_shape=(n_total_cells * n_features, n_imgs), n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) self.workspace.save(features_path, Z_final) else: run_parallel(func=func, data=data, args=[output_img_path], batched_args=[feature_map_dims.T, idxs], result_shape=None, n_batches=n_batches, mmap=self.mmap, msg=msg, n_jobs=self.n_jobs) if conv: img_dims = np.zeros((3, n_imgs)).astype(int) img_dim_files = [os.path.join(output_img_path, f) for f in os.listdir(output_img_path) if os.path.isfile(os.path.join(output_img_path, f)) and f.startswith("img_dim")] img_dim_files.sort() for i, img_dim_file in enumerate(img_dim_files): dims = np.load(img_dim_file) img_dims[:, i] = dims os.remove(img_dim_file) self.workspace.save(os.path.join(output_img_path, "dims.npy"), img_dims) if layer < self.n_layers - 1: self.imgs[layer + 1] = online_reader(path=output_img_path, sparse="3D", prefix="img", suffix="npz")