def learn_dictionary (self, images, npatches=50000, niter=1000, njobs=-1): """ Learn a Sparse Code dictionary for this ScSPM. This method trains a sparse codes dictionary for the ScSPM descriptor object. This only needs to be run once before multiple calls to the extract() method can be made. Arguments: images: list, a list of paths to images to use for training. npatches: int (default 50000) number of SIFT patches to extract from the images to use for training the dictionary. niter: int (default 1000), the number of iterations of dictionary learning (Lasso) to perform. njobs: int (default -1), the number of threads to use. -1 means the number of threads will be equal to the number of cores. """ # Get SIFT training patches print('Getting training patches...') patches = sw.training_patches(images, npatches, self.psize, self.maxdim, verbose=True) patches = pch.norm_patches(patches) print('{0} patches requested, {1} patches found.'.format(npatches, patches.shape[0])) time.sleep(3) # Give people a chance to see this message # Learn dictionary print('Learning dictionary...') self.dic = trainDL(np.asfortranarray(patches.T, np.float64), mode=0, K=self.dsize, lambda1=0.15, iter=niter, numThreads=njobs) print('done.')
def run_multiprocessing_queue_spams_trainDL(out_queue, *args, **kwargs): """ Designed to run spams.trainDL in a separate process. It is necessary to run SPAMS in a separate process as segmentation faults have been discovered in later parts of the Python code dependent on whether SPAMS has run or not. It is suspected that spams may interfere with the interpreter. Thus, it should be sandboxed (run in a different Python interpreter) so that it doesn't damage what happens in this one. This particular version uses a multiprocessing.Queue to return the resulting dictionary. Args: out_queue(multiprocessing.Queue): what will take the returned dictionary from spams.trainDL. *args(list): a list of position arguments to pass to spams.trainDL. *kwargs(dict): a dictionary of keyword arguments to pass to spams.trainDL. """ # It is not needed outside of calling spams.trainDL. # Also, it takes a long time to load this module. import spams result = spams.trainDL(*args, **kwargs) out_queue.put(result)
def call_spams_trainDL(*args, **kwargs): """ Encapsulates call to spams.trainDL. Ensures copy of results occur just in case. Designed to be like the multiprocessing calls. Args: *args(list): a list of position arguments to pass to spams.trainDL. **kwargs(dict): a dictionary of keyword arguments to pass to spams.trainDL. Note: For legacy. """ # It is not needed outside of calling spams.trainDL. # Also, it takes a long time to load this module. import spams result = spams.trainDL(*args, **kwargs) result = result.copy() return(result)
def getstainMat(I,param,i_0): #I : Patch for W estimation V,VforW=BLtrans(I,i_0) #Beer-Lambert law #step 2: Sparse NMF factorization (Learning W; V=WH) out = suppress_stdout() Ws = spams.trainDL(np.asfortranarray(np.transpose(VforW)),**param) suppress_stdout(out) return Ws
def get_staincolor_sparsenmf(v): blockPrint() D = spams.trainDL(np.transpose(v), **param) enablePrint() a_arg = np.argsort(np.transpose(D)[:, 1]) # print(np.transpose(np.transpose(D)[a_arg])) return np.transpose(np.transpose(D)[a_arg])
def __init__(self, X, n_components, alpha, save=None): X1 = np.asfortranarray(X.T) self.X = X1 self.lambda1 = alpha self.d = spams.trainDL(X1, K=n_components, mode=3, modeD=0, numThreads=-1, lambda1=alpha, return_model=False) self.d = np.asfortranarray(self.d) if save: np.save(save, self.d)
def test_trainDL_Memory(): img_file = 'lena.png' try: img = Image.open(img_file) except: print("Cannot load image %s : skipping test" %img_file) return None I = np.array(img) / 255. if I.ndim == 3: A = np.asfortranarray(I.reshape((I.shape[0],I.shape[1] * I.shape[2]))) rgb = True else: A = np.asfortranarray(I) rgb = False m = 8 n = 8 X = spams.im2col_sliding(A,m,n,rgb) X = X - np.tile(np.mean(X,0),(X.shape[0],1)) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1))) X = np.asfortranarray(X[:,np.arange(0,X.shape[1],10)],dtype = myfloat) param = { 'K' : 200, # learns a dictionary with 100 elements 'lambda1' : 0.15, 'numThreads' : 4, 'iter' : 100} ############# FIRST EXPERIMENT ################## tic = time.time() D = spams.trainDL_Memory(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) print('Evaluating cost function...') lparam = _extract_lasso_param(param) alpha = spams.lasso(X,D = D,**lparam) xd = X - D * alpha R = np.mean(0.5 * (xd * xd).sum(axis=0) + param['lambda1'] * np.abs(alpha).sum(axis=0)) print("objective function: %f" %R) #* ? DISPLAY ############# SECOND EXPERIMENT ################## tic = time.time() D = spams.trainDL(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) print('Evaluating cost function...') alpha = spams.lasso(X,D = D,**lparam) xd = X - D * alpha R = np.mean(0.5 * (xd * xd).sum(axis=0) + param['lambda1'] * np.abs(alpha).sum(axis=0)) print("objective function: %f" %R) #* ? DISPLAY return None
def learn_D(self, segment_list, k, a=None, batch=False, iter=-5): # Horizontal train list temp = numpy.hstack(segment_list) if a == None: a = 1.0 / math.sqrt(temp.shape[0]) # Learn dictionary D = spams.trainDL(numpy.asfortranarray(temp), K=k, lambda1=a, batch=batch, iter=iter, posAlpha=True) return D
def test_trainDL_Memory(): img_file = 'lena.png' try: img = Image.open(img_file) except: print("Cannot load image %s : skipping test" %img_file) return None I = np.array(img) / 255. if I.ndim == 3: A = np.asfortranarray(I.reshape((I.shape[0],I.shape[1] * I.shape[2]))) rgb = True else: A = np.asfortranarray(I) rgb = False m = 8;n = 8; X = spams.im2col_sliding(A,m,n,rgb) X = X - np.tile(np.mean(X,0),(X.shape[0],1)) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1))) X = np.asfortranarray(X[:,np.arange(0,X.shape[1],10)],dtype = myfloat) param = { 'K' : 200, # learns a dictionary with 100 elements 'lambda1' : 0.15, 'numThreads' : 4, 'iter' : 100} ############# FIRST EXPERIMENT ################## tic = time.time() D = spams.trainDL_Memory(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) print('Evaluating cost function...') lparam = _extract_lasso_param(param) alpha = spams.lasso(X,D = D,**lparam) xd = X - D * alpha R = np.mean(0.5 * (xd * xd).sum(axis=0) + param['lambda1'] * np.abs(alpha).sum(axis=0)) print("objective function: %f" %R) #* ? DISPLAY ############# SECOND EXPERIMENT ################## tic = time.time() D = spams.trainDL(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) print('Evaluating cost function...') alpha = spams.lasso(X,D = D,**lparam) xd = X - D * alpha R = np.mean(0.5 * (xd * xd).sum(axis=0) + param['lambda1'] * np.abs(alpha).sum(axis=0)) print("objective function: %f" %R) #* ? DISPLAY return None
def DicL(i,output_dir,dicl_dim,migp_dim): #load migp data, a subject-by-feature matrix ff = output_dir+'/DicL' + str(dicl_dim) + '/DLdata_mod'+('%02d' % (i+1))+'.npy' if os.path.exists(ff) == 0: dd=np.load(output_dir+'/MIGP' + str(migp_dim) + '/PCAdata_mod'+('%02d' % (i+1))+'.npy') D = spams.trainDL(np.asfortranarray(dd.T,dtype ='float64'),**param) #this is dicl_dim-by-migp_dim np.save(ff,D) else: D = np.load(ff) return D
def getDictionary(Img, patch_size, **param): I = np.array(Img) / 255. A = np.asfortranarray(I) rgb = False X = spams.im2col_sliding(A, patch_size, patch_size, rgb) X = im2col(Img, (patch_size, patch_size)) X = X - np.tile(np.mean(X, 0), (X.shape[0], 1)) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)), (X.shape[0], 1)), dtype=float) D = spams.trainDL(X, **param) return D
def get_staincolor_sparsenmf(v, **param): # Params # Learning W through sparse NMF Ws = spams.trainDL(v.T, **param) # Arranging H stain color vector as first column and then the second column # vectors as E stain color vector Ws = Ws.T Ws = Ws[Ws[:, 1].argsort()] return Ws.T
def learn_dict(vecs, factor): print 'X shape: ' + str(vecs.shape) X = vecs n_components = len(X) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1)),dtype = 'float32') param = { 'K' : factor*n_components, # learns a dictionary with K elements 'lambda1' : 0.15, 'numThreads' : -1, 'batchsize' : 50, 'iter' : 1000} D = spams.trainDL(X,**param) print 'Q shape: ' + str(D.shape) return D
def getstainMat(I, param): #Source Input #I : Patch for W estimation V, VforW = BLtrans(I) #Beer-Lambert law #step 2: Sparse NMF factorization (Learning W; V=WH) out = suppress_stdout() Ws = spams.trainDL(np.asfortranarray(np.transpose(VforW)), **param) suppress_stdout(out) #Label the columns to be Hematoxylin and eosin tmp = np.transpose(Ws) Ws = np.transpose(tmp[tmp[:, 2].argsort(), ]) return Ws
def fit(x, num): return spams.trainDL( K=num, numThreads=2, X=np.asfortranarray(x.T), mode=[4, 2][self.use_lasso], lambda1=[self.l0_max, self.l1_dictionary][self.use_lasso], iter=self.num_iterations, verbose=self.verbose, posAlpha=self.positive_coefficients, batchsize=self.minibatch_size, )
def dictionary_learning(patch_feature, lambda1=1, dictionary_size=100, batchsize=100, posD=True): # input shape (feature size, sample size) X_patch = np.asfortranarray(patch_feature) param = { 'K' : dictionary_size, # learns a dictionary with 400 elements "mode":0, 'lambda1' : lambda1, 'numThreads' : -1, "batchsize":batchsize, 'posD':posD, "verbose":False } D = spams.trainDL(X_patch,**param) return D
def get_staincolor_sparsenmf(v, param): #print(param,v.T.shape) #print np.where(v.T<0) #parame = { 'K' :2,'lambda1': 0.02,'numThreads' :4,'posAlpha':True} Ws = spams.trainDL(v.T, **param) #print('here') #model = NMF(n_components=param['K'], init='random', random_state=2,alpha=0.02, l1_ratio=1) ##print(len(v<0)) #Ws = model.fit_transform(v, y=None, W=None, H=None) #print("W_stain shape",Ws.shape) #print Ws.shape Ws = Ws[:, np.argsort(-Ws[1, :])] return Ws
def train_SC(database, param): num = database.shape[0] X = database.reshape([num, 80, 59]) X = np.swapaxes(X, axis1=0, axis2=2) D_list = [] pathces_num = X.shape[1] for i in range(pathces_num): x = np.asfortranarray(X[:, i, :]) D = spams.trainDL(x, **param) D_list.append(D) return D_list
def alignfirst_dico(dataset, N0, J, init=None, save=False, directory=None, verbose=False): '''Performs (real) dictionary learning on the dataset, after it is optimally rotated along its mean. Relies on the SPAMS toolbox of Mairal et al. ''' K1 = len(dataset) dataset = align_rot(dataset) dataset_r = multi_complex2real(dataset) X = sqrtPsi @ dataset_r.T X = np.asfortranarray(X) # necessary for using spams toolbox D = spams.trainDL(X, K=J, D=init, mode=3, modeD=0, lambda1=N0, verbose=verbose) A = spams.omp(X, D=D, L=N0) Ad = np.array(A.todense()).reshape(J, K) D_c = multi_real2complex((sqrtPsi_inv @ D).T).T drawMany(D_c.T, show=False) plt.title('Align-first dictionary N0 = {} J = {}'.format(N0, J)) if save: plt.savefig(directory + '/dico_alignfirst.png', dpi=200) plt.show() if verbose: DA = D_c @ A for k in test_k_set: display_res(dataset, DA, k, save=save, directory=directory) diffs = dataset.T - D_c @ Ad if K1 < 10000: E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real else: E = 0 for k in range(K): E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real print('final loss : ', E) print('RMSE :', np.sqrt(E / K)) if save: text_file = open(directory + '/readme_alignfirst.txt', 'a') text_file.write('Final loss: {}\n'.format(E)) text_file.write('Final RMSE: {}\n'.format(np.sqrt(E / K))) text_file.close() return D_c, Ad, E
def online_learning(Xdev, Ydev, X, Y, covar, display=False, K=1, method='mean'): """ uses online dictionary learning on the dev matrices to build and estimate Y_hat from the given X """ X_init = np.array(Xdev, dtype=np.float64, order="FORTRAN") Y_init = np.array(Ydev, dtype=np.float64, order="FORTRAN") # f here is the number of features # F is the magnitude spectrum frequency number (f, T) = X_init.shape (F, T) = Y_init.shape # Learning a rank-f model # initialize the model ? A = np.array(np.eye(F), dtype=np.float64, order="FORTRAN") B = np.array(X_init, dtype=np.float64, order="FORTRAN") prev_model = {'A': A, 'B': B, 'iter': T} # or not # model = None (D, model) = spams.trainDL(Y_init, return_model=True, model=prev_model, iter=40, lambda1=1, posAlpha=False, K=f) print D.shape, X.shape, model['A'].shape Y_hat = np.dot(D, np.dot(model['A'], X)) print Y_hat.shape if display: plt.figure() plt.subplot(121) plt.imshow(np.log(Y)) plt.title('Reponse originale') plt.colorbar() plt.subplot(122) plt.imshow(np.log(Y_hat)) plt.colorbar() plt.title('Reponse predite') plt.show() return Y_hat, D
def getW(self, V): W = spams.trainDL(np.asfortranarray(V), K=self.STAIN_NUM, lambda1=self.LAMBDA1, iter=self.ITER, mode=2, modeD=0, posAlpha=True, posD=True, verbose=False) W = W / np.linalg.norm(W, axis=0)[None, :] if (W[0, 0] < W[0, 1]): W = W[:, [1, 0]] return W
def getstainMat(I, param, i_0): #Beer-Lambert transform V, VforW = BLtrans(I, i_0) out = suppress_stdout() #Sparse NMF (Learning W; V=WH) #W is learnt only using VforW, i.e. by ignoring the white pixels #change VforW to V for W-estimation using all pixels Ws = spams.trainDL(np.asfortranarray(np.transpose(VforW)), **param) suppress_stdout(out) return Ws
def selectByChai2016(nGuide, fileName, parallel, nFrame, initD=None): if parallel: X, hairHeader, Data = SCGetMatrixAndHeaderMP(fileName, readEachFrame, nFrame) # X: len(u_s) x nHair, float64 else: X, hairHeader, Data = SCGetMatrixAndHeader(fileName, readEachFrame, nFrame) # X: len(u_s) x nHair, float64 lambda1 = para.lambda1 Us = np.asfortranarray(X, 'd') params = {'lambda1': lambda1, 'lambda2': 0, 'return_model': True, 'model': None, 'posAlpha': True} D, ABi = spams.trainDL(Us, D=initD, K=nGuide, iter=100, batchsize=10, **params) # D: len(u_s) x nGuide guide, nGuide = pickGuideHair(D, X) print "Got %d guide hairs" % nGuide return guide, nGuide
def get_stain_matrix(I, threshold=0.8, lamda=0.1): """ Get 2x3 stain matrix. First row H and second row E :param I: :param threshold: :param lamda: :return: """ mask = ut.notwhite_mask(I, thresh=threshold).reshape((-1,)) OD = ut.RGB_to_OD(I).reshape((-1, 3)) OD = OD[mask] dictionary = spams.trainDL(OD.T, K=2, lambda1=lamda, mode=2, modeD=0, posAlpha=True, posD=True, verbose=False).T if dictionary[0, 0] < dictionary[1, 0]: dictionary = dictionary[[1, 0], :] dictionary = ut.normalize_rows(dictionary) return dictionary
def learn_semantic_atoms(self, matrix, corpus_file, squared_norms, w2i, params, initial_D=None, file_id=None): if file_id is not None and os.path.exists( '{}.dict.gz'.format(file_id)): D = np.loadtxt('{}.dict.gz'.format(file_id)) return np.asfortranarray(D) D = np.asfortranarray(spams.trainDL(matrix.T, D=initial_D, **params)) if file_id is not None: np.savetxt('{}.dict.gz'.format(file_id), D) return D
def sample_test(driverNum, sampleNum, w_len = 40, k = 500): path = '/media/shih/新增磁碟區/ZiWen_packup/drivers/drivers' bow_sp = bow.BoW_sp() folder = os.listdir(path) sample = [int(i) for i in folder] #print sample sample.remove(driverNum) #print [sample, type(sample)] rand_driver = [random.choice(sample) for i in range(0,sampleNum)] #print [sample, type(sample)] #rand_driver = [folder[i] for i in sample ] #print rand_driver a = 1.0/math.sqrt(w_len) trajectory = list() #Iteratively process drivers data for i,driver in enumerate(rand_driver): # Load 200 trajectory for random drivers j = random.randint(1,200) #print os.path.join(path, str(driver), str(j)+'.csv') temp = numpy.genfromtxt(os.path.join(path, str(driver), str(j)+'.csv'), delimiter=',', skip_header=True) #print j temp = dataTransformation.trip_diff(temp) trajectory.append(numpy.asarray(temp)) #print [len(trajectory)] trajectory = bow_sp.segment(trajectory,w_len=40) print [len(trajectory), numpy.shape(numpy.hstack(trajectory))] D = spams.trainDL(numpy.asfortranarray(numpy.hstack(trajectory)), K=k, lambda1=a, posAlpha=True, iter=-3) #print [len(trajectory), len(trajectory[0])] sparseCode = bow_sp.coding_series(trajectory, D, a=a, iter=-3) #numpy.savetxt('test_result.csv', sparseCode, delimiter=',') #sparseCode = sparseCode/ numpy.linalg.norm(sparseCode) # no need to normalize, trajectory always different length #raw_input("Press Enter to continue...") return sparseCode
def get_stain_matrix(I, threshold=0.8, lamda=0.1): """ Get 2x3 stain matrix. First row H and second row E. See the original paper for details. Also see spams docs. :param I: Image RGB uint8. :param threshold: :param lamda: :return: """ mask = mu.notwhite_mask(I, thresh=threshold).reshape((-1,)) OD = mu.RGB_to_OD(I).reshape((-1, 3)) OD = OD[mask] dictionary = spams.trainDL(OD.T, K=2, lambda1=lamda, mode=2, modeD=0, posAlpha=True, posD=True, verbose=False).T if dictionary[0, 0] < dictionary[1, 0]: dictionary = dictionary[[1, 0], :] dictionary = mu.normalize_rows(dictionary) return dictionary
def get_stain_matrix(self, source_image): """ OD = SV. Get V Parameters ---------- source_image: array_like np.unit8 array of rgb values Returns ------- stain_matrix: array_like 2 x M matrix for a N x M matrix """ OD = RGB2OD(source_image) OD = OD.reshape((-1, 3)) if self.maskout_white: nonwhite_mask = get_nonwhite_mask(source_image, self.nonwhite_threshold).reshape( (-1, )) OD = OD[nonwhite_mask] OD = OD[(OD > self.beta).any(axis=1), :] self.OD = OD """ Objective ||X-Da||_2^ + lambda1 * ||a||_1 + lambda2 ||a||_2^2 + """ param = { "K": 2, "lambda1": self.lambda1, "lambda2": self.lambda2, "gamma1": self.gamma1, "mode": 2, "modeD": 1, "posD": True, "posAlpha": True, "verbose": False, } stain_matrix = spams.trainDL(OD.T, **param).T if stain_matrix[0, 0] < stain_matrix[1, 0]: stain_matrix = stain_matrix[[1, 0], :] return stain_matrix
def call_spams_trainDL(*args, **kwargs): """ Encapsulates call to spams.trainDL. Ensures copy of results occur just in case. Designed to be like the multiprocessing calls. Args: *args(list): a list of position arguments to pass to spams.trainDL. **kwargs(dict): a dictionary of keyword arguments to pass to spams.trainDL. Note: For legacy. """ # It is not needed outside of calling spams.trainDL. # Also, it takes a long time to load this module. import spams result = spams.trainDL(*args, **kwargs) result = result.copy() return (result)
def selectByChai2016New(nGuide, fileName, parallel, nFrame, initD=None): if parallel: X, hairHeader, Data = SCGetMatrixAndHeaderMP(fileName, readEachFrameNoDir, nFrame) # X: len(u_s) x nHair, float64 else: X, hairHeader, Data = SCGetMatrixAndHeader(fileName, readEachFrameNoDir, nFrame) # X: len(u_s) x nHair, float64 offset = hairHeader.factor * 3 X0 = X[:offset, :] X = X[offset:, :] - np.tile(X0, (nFrame-1, 1)) lambda1 = para.lambda1 Us = np.asfortranarray(X, 'd') params = {'lambda1': lambda1, 'lambda2': 0, 'return_model': True, 'model': None, 'posAlpha': True} D, ABi = spams.trainDL(Us, D=initD, K=nGuide, iter=100, batchsize=10, **params) # D: len(u_s) x nGuide norm = lambda x: np.linalg.norm(x) guide, nGuide = pickGuideHair(D, X, norm, norm) print "Got %d guide hairs" % nGuide return guide, nGuide
def learn_dictionary(self, images, npatches=50000, niter=1000, njobs=-1): """ Learn a Sparse Code dictionary for this ScSPM. This method trains a sparse codes dictionary for the ScSPM descriptor object. This only needs to be run once before multiple calls to the extract() method can be made. Arguments: images: list, a list of paths to images to use for training. npatches: int (default 50000) number of SIFT patches to extract from the images to use for training the dictionary. niter: int (default 1000), the number of iterations of dictionary learning (Lasso) to perform. njobs: int (default -1), the number of threads to use. -1 means the number of threads will be equal to the number of cores. """ # Get SIFT training patches print('Getting training patches...') patches = sw.training_patches(images, npatches, self.psize, self.maxdim, verbose=True) patches = pch.norm_patches(patches) print('{0} patches requested, {1} patches found.'.format( npatches, patches.shape[0])) time.sleep(3) # Give people a chance to see this message # Learn dictionary print('Learning dictionary...') self.dic = trainDL(np.asfortranarray(patches.T, np.float64), mode=0, K=self.dsize, lambda1=0.15, iter=niter, numThreads=njobs) print('done.')
def learn_D(segment_list, k, lambda1=None, batch=False, iter1=-5): """Learn dictionary from given series with input parameters Args: segment_list(list): each item contains m subsequences which is sliced from original time series instance k(int): size of dictionary lambda1(float): lambda conefficient in sparse coding, |X-D*a|^2 + lambda*|a|^1 For more information, see spams packages: http://spams-devel.gforge.inria.fr/doc-python/html/doc_spams004.html#sec5 batch(bool): online learning or batch learning for sparse coding For more information, see spams packages: http://spams-devel.gforge.inria.fr/doc-python/html/doc_spams004.html#sec5 iter1(int): learning iterations For more information, see spams packages: http://spams-devel.gforge.inria.fr/doc-python/html/doc_spams004.html#sec5 Returns: D(numpy 2d-array): learning dictionary """ # Horizontal train list temp = numpy.hstack(segment_list) if lambda1 == None: lambda1 = 1.0 / math.sqrt(temp.shape[0]) # Log learning informatino log_msg = "learning dictionary with lambda: %f" % (lambda1) print(log_msg) logger.info(log_msg) # Learn dictionary print("About to train") D = spams.trainDL(numpy.asarray(temp, dtype=numpy.float64), K=2, lambda1=lambda1, batch=batch, iter=iter1, posAlpha=True) print("Trained") return D
def fit(self, X, y=None): ''' Fit a NMF model using the spams package Parameters ---------- X : array, shape (n_samples, n_features) Data matrix to be fitted by the model y : ignored Returns ------- self ''' # Set the seed for numpy.random np.random.seed(self.random_state) # Create bootstrapped X if self.bootstrap: n_samples = X.shape[0] bootstrap_X = X[np.random.choice(n_samples, n_samples, replace=True)] else: bootstrap_X = X # Compute the initialization dictionary initialization = initialguess(bootstrap_X.T, self.n_components) # Use spams to compute the PPs Dsolution = spams.trainDL( # Data matrix # we flip X because spams requires features as rows np.asfortranarray(bootstrap_X.T), # Initial guess as provided by initialguess() D=initialization, **self.arguments) self.components_ = Dsolution.T return self
def extract_components(mov_tot, n_components=6, normalize_std=True, max_iter_DL=-30, method_factorization='nmf', **kwargs): """ From optical flow images can extract spatial and temporal components Parameters: ---------- mov_tot: ndarray (can be 3 or 4D) contains the optical flow values, either in cartesian or polar, either one (3D) or both (4D coordinates) the input is generated by the compute_optical_flow function n_components: int number of components to look for normalize_std: bool whether to normalize each oof the optical flow components normalize_output_traces: boolean whether to normalize the behavioral traces so that they match the units in the movie Returns: ------- spatial_filter: ndarray set of spatial inferred filters time_trace:ndarray set of time components norm_fact: ndarray used notmalization factors """ if mov_tot.ndim == 4: if normalize_std: norm_fact = np.nanstd(mov_tot, axis=(1, 2, 3)) mov_tot = old_div( mov_tot, norm_fact[:, np.newaxis, np.newaxis, np.newaxis]) else: norm_fact = np.array([1., 1.]) c, T, d1, d2 = np.shape(mov_tot) else: norm_fact = 1 T, d1, d2 = np.shape(mov_tot) c = 1 tt = time.time() newm = np.reshape(mov_tot, (c * T, d1 * d2)) if method_factorization == 'nmf': nmf = NMF(n_components=n_components, **kwargs) time_trace = nmf.fit_transform(newm) spatial_filter = nmf.components_ spatial_filter = np.concatenate( [np.reshape(sp, (d1, d2))[np.newaxis, :, :] for sp in spatial_filter], axis=0) elif method_factorization == 'dict_learn': import spams newm = np.asfortranarray(newm, dtype=np.float32) time_trace = spams.trainDL( newm, K=n_components, mode=0, lambda1=1, posAlpha=True, iter=max_iter_DL) spatial_filter = spams.lasso(newm, D=time_trace, return_reg_path=False, lambda1=0.01, mode=spams.spams_wrap.PENALTY, pos=True) spatial_filter = np.concatenate([np.reshape( sp, (d1, d2))[np.newaxis, :, :] for sp in spatial_filter.toarray()], axis=0) time_trace = [np.reshape(ttr, (c, T)).T for ttr in time_trace.T] el_t = time.time() - tt print(el_t) return spatial_filter, time_trace, norm_fact
def test_trainDL(): img_file = 'boat.png' try: img = Image.open(img_file) except: print("Cannot load image %s : skipping test" %img_file) return None I = np.array(img) / 255. if I.ndim == 3: A = np.asfortranarray(I.reshape((I.shape[0],I.shape[1] * I.shape[2]))) rgb = True else: A = np.asfortranarray(I) rgb = False m = 8;n = 8; X = spams.im2col_sliding(A,m,n,rgb) X = X - np.tile(np.mean(X,0),(X.shape[0],1)) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1)),dtype = myfloat) param = { 'K' : 100, # learns a dictionary with 100 elements 'lambda1' : 0.15, 'numThreads' : 4, 'batchsize' : 400, 'iter' : 1000} ########## FIRST EXPERIMENT ########### tic = time.time() D = spams.trainDL(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) ##param['approx'] = 0 # save dictionnary as dict.png _objective(X,D,param,'dict') #### SECOND EXPERIMENT #### print("*********** SECOND EXPERIMENT ***********") X1 = X[:,0:X.shape[1]//2] X2 = X[:,X.shape[1]//2 -1:] param['iter'] = 500 tic = time.time() (D,model) = spams.trainDL(X1,return_model = True,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f\n' %t) _objective(X,D,param,'dict1') # Then reuse the learned model to retrain a few iterations more. param2 = param.copy() param2['D'] = D tic = time.time() (D,model) = spams.trainDL(X2,return_model = True,model = model,**param2) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) _objective(X,D,param,'dict2') #################### THIRD & FOURTH EXPERIMENT ###################### # let us add sparsity to the dictionary itself print('*********** THIRD EXPERIMENT ***********') param['modeParam'] = 0 param['iter'] = 1000 param['gamma1'] = 0.3 param['modeD'] = 1 tic = time.time() D = spams.trainDL(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) _objective(X,D,param) #* DISPLAY print('*********** FOURTH EXPERIMENT ***********') param['modeParam'] = 0 param['iter'] = 1000 param['gamma1'] = 0.3 param['modeD'] = 3 tic = time.time() D = spams.trainDL(X,**param) tac = time.time() t = tac - tic print('time of computation for Dictionary Learning: %f' %t) _objective(X,D,param) return None
for m,n in itertools.product(range(20,51,5),range(100,1001,100)): counter = 0 result = 0 # Iteratively process drivers' data for driver in sorted(folder): print '---------------|'+str(driver)+'|-------------------' # Load 200 trajectory for each drivers trajectory = list() for j in range(1, 201): temp = numpy.genfromtxt(os.path.join(path, str(driver), str(j)+'.csv'), delimiter=',', skip_header=True) trajectory.append(temp) trajectory = bow_inter.diff(trajectory) trajectory_seg = bow_inter.slice(trajectory,w_len=m) D = spams.trainDL(numpy.asfortranarray(trajectory_seg), K=n, lambda1=a, posAlpha=True, iter=-3) #print [len(trajectory), len(trajectory[0])] driverCode = bow_inter.bow(trajectory, D, a=a, w_len=m) #numpy.savetxt('result.csv', driverCode, delimiter=',') #driverCode = driverCode/ numpy.linalg.norm(driverCode) # no need to normalize, trajectory always different length # ======================== sample negative data ========================== sample = [int(i) for i in folder] sample.remove(int(driver)) # Load 200 trajectory for random drivers rand_driver = [random.choice(sample) for i in range(0,200)] trajectory = list()
submatrix = np.asfortranarray(np.zeros((n, submatrix_size), dtype = np.float32)) D = None for k in range(submatrix_iterations): print("iteration on submatrices " + str(k)) submatrix_nonzero_indices = global_nonzero_indices[(submatrix_size * k):(submatrix_size * (k + 1))] print("read data ") for i in range(len(Kmer_Hash_Count_Files)): submatrix[i,:] = np.memmap(Kmer_Hash_Count_Files[i], dtype='float32', mode='r')[submatrix_nonzero_indices] print("matrix normalization") submatrix = submatrix / np.sqrt(np.sum(submatrix * submatrix, 0)) D = spams.trainDL(submatrix, D = D, K = args.cluster_number, lambda1 = args.lambda1, lambda2 = args.lambda2, posAlpha = True, posD = True, rho = 1.0, iter = args.iter_nb, batchsize = args.batchsize, numThreads = cpu) np.save(args.outputdir + "cluster_index.npy", D.T) print(D) print("cluster kmers") cluster_cols = np.zeros(2**hash_size, dtype = 'uint16') submatrix_size = int(memory_limit / (4 * (args.cluster_number + n))) submatrix_iterations = int(nzi / submatrix_size) + 1 submatrix_size = min(nzi, submatrix_size)
def denoise(data, block_size, overlap, param_alpha, param_D, variance, n_iter=10, mask=None, dtype=np.float64): # no overlapping blocks for training no_over = (0, 0, 0, 0) X = im2col_nd(data, block_size, no_over) # Solving for D param_alpha['pos'] = True param_alpha['mode'] = 2 param_alpha['lambda1'] = 1.2 / np.sqrt(np.prod(block_size)) param_D['verbose'] = False param_D['posAlpha'] = True param_D['posD'] = True param_D['mode'] = 2 param_D['lambda1'] = 1.2 / np.sqrt(np.prod(block_size)) param_D['K'] = int(2*np.prod(block_size)) param_D['iter'] = 150 param_D['batchsize'] = 500 if 'D' in param_alpha: param_D['D'] = param_alpha['D'] # mask_col = im2col_nd(mask, block_size[:3], no_over[:3]) mask_col = im2col_nd(np.broadcast_to(mask[..., None], data.shape), block_size, no_over) train_idx = np.sum(mask_col, axis=0) > mask_col.shape[0]/2 train_data = X[:, train_idx] train_data = np.asfortranarray(train_data[:, np.any(train_data != 0, axis=0)], dtype=dtype) train_data /= np.sqrt(np.sum(train_data**2, axis=0, keepdims=True), dtype=dtype) param_alpha['D'] = spams.trainDL(train_data, **param_D) param_alpha['D'] /= np.sqrt(np.sum(param_alpha['D']**2, axis=0, keepdims=True, dtype=dtype)) param_D['D'] = param_alpha['D'] del train_data n_cores = param_alpha['numThreads'] param_alpha['numThreads'] = 1 param_D['numThreads'] = 1 time_multi = time() pool = Pool(processes=n_cores) arglist = [(data[:, :, k:k+block_size[2]], mask[:, :, k:k+block_size[2]], variance[:, :, k:k+block_size[2]], block_size_subset, overlap_subset, param_alpha_subset, param_D_subset, dtype_subset, n_iter_subset) for k, block_size_subset, overlap_subset, param_alpha_subset, param_D_subset, dtype_subset, n_iter_subset in zip(range(data.shape[2] - block_size[2] + 1), repeat(block_size), repeat(overlap), repeat(param_alpha), repeat(param_D), repeat(dtype), repeat(n_iter))] data_denoised = pool.map(processer, arglist) pool.close() pool.join() param_alpha['numThreads'] = n_cores param_D['numThreads'] = n_cores print('Multiprocessing done in {0:.2f} mins.'.format((time() - time_multi) / 60.)) # Put together the multiprocessed results data_subset = np.zeros_like(data) divider = np.zeros_like(data, dtype=np.int16) ones = np.ones_like(data_denoised[0], dtype=np.int16) for k in range(len(data_denoised)): data_subset[:, :, k:k+block_size[2]] += data_denoised[k] divider[:, :, k:k+block_size[2]] += ones data_subset /= divider return data_subset
'lambda1': Lambda, # number of threads 'numThreads': -1, 'batchsize': min(1024, n), # positive dictionary 'posD': True, # positive dictionary 'iter': 500, # number of iteration 'modeD': 0, 'verbose': 0, # print out update information? #'pos': 1, # positive alpha 'posAlpha': 1, # positive alpha 'gamma1': gamma1, # penalizing parameter on the dictionary patterns 'D': np.asfortranarray(D0) # set initial values } X = np.asfortranarray(X, dtype=float) # print X.shape Dtemplate = spams.trainDL(X, **param) # for each fixed dictionary K, we will repeat dictionary # learning for 100 times, each with a different initial value test_cases = 10 R = np.zeros((test_cases, )) for i in xrange(0, test_cases): if randomStart == 1: D0 = util.dictLearnInit(X, K, 'random', 0) param['D'] = np.asfortranarray(D0) lparam = {'lambda1': Lambda, 'pos': True, 'mode': 2, 'numThreads': -1 } D = spams.trainDL(X, **param)
rgb = False m = 8;n = 8; #m = 2;n = 2; X = spams.im2col_sliding(A,m,n,rgb) X = X - np.tile(np.mean(X,0),(X.shape[0],1)) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1)),dtype = myfloat) param = { 'K' : 100, # learns a dictionary with 100 elements 'lambda1' : 0.15, 'numThreads' : 4, 'batchsize' : 400, 'iter' : 10} paramL = {'lambda1' : 0.15, 'numThreads' : 4} ########## FIRST EXPERIMENT ########### tic = time.time() D = spams.trainDL(X,**param) tac = time.time() t = tac - tic print 'time of computation for Dictionary Learning: %f' %t print "DTYPE %s" %str(D.dtype) #param['approx'] = 0 print 'Evaluating cost function...' alpha = spams.lasso(X,D,**paramL) print "XX X %s, D %s, alpha %s" %(str(X.shape),str(D.shape),str(alpha.shape)) y = X if(alpha.shape[1] > 1000): alpha = alpha[:,0:1000] y = X[:,0:1000] #Da = spams.calcXAt(D,ssp.csc_matrix(alpha.T)) a = alpha.todense() print "XXa %s" %str(a.shape)
def run_multiprocessing_array_spams_trainDL(result_array_type, result_array, X_array_type, X_array, D_is_arg=False, D_array_type=None, D_array=None, *args, **kwargs): """ Designed to start spams.trainDL in a separate process and handle the result in an unnoticeably different way. It is necessary to run SPAMS in a separate process as segmentation faults have been discovered in later parts of the Python code dependent on whether SPAMS has run or not. It is suspected that spams may interfere with the interpreter. Thus, it should be sandboxed (run in a different Python interpreter) so that it doesn't damage what happens in this one. This particular version uses a multiprocessing.Array to share memory to return the resulting dictionary. Args: result_array_type(numpy.ctypeslib.ndpointer): Unused will drop. A pointer type with properties needed by result_array. result_array(multiprocessing.RawArray): shared memory array to store results in. X_array_type(numpy.ctypeslib.ndpointer): Unused will drop. a pointer type with properties needed by X_array. X_array(numpy.ndarray): currently uses numpy ndarray as input. D_is_arg(bool): Whether D either is an arg and/or should be an arg. D_array_type(numpy.ctypeslib.ndpointer): Unused will drop. a pointer type with properties needed by D_array. D_array(numpy.ndarray): currently uses numpy ndarray as the initial dictionary. *args(list): a list of position arguments to pass to spams.trainDL. **kwargs(dict): a dictionary of keyword arguments to pass to spams.trainDL. Note: This is somewhat faster than using multiprocessing.Queue. """ # Just to make sure this exists in the new process. Shouldn't be necessary. import numpy # Just to make sure this exists in the new process. Shouldn't be necessary. # Also, it is not needed outside of calling this function. import spams with npctypes.shared.as_ndarray(X_array) as X: with npctypes.shared.as_ndarray(result_array) as result: if D_array is not None: with npctypes.shared.as_ndarray(D_array) as D: if D_is_arg: args[3] = D else: kwargs["D"] = D result[:] = spams.trainDL(X, *args, **kwargs) else: result[:] = spams.trainDL(X, *args, **kwargs)
X = X[0] # extract 2d patches from the image X_dl = extract_patches_2d(X, atom_support, max_patches=max_patches) X_dl = X_dl.reshape(X_dl.shape[0], -1) norm = np.linalg.norm(X_dl, axis=1) mask = norm != 0 X_dl = X_dl[mask] X_dl /= norm[mask][:, None] meta = dict(lambda1=reg, iter=10_000, mode=2, posAlpha=True, posD=False) # Learn the dictionary with spams D_dl = spams.trainDL(np.asfortranarray(X_dl.T, dtype=np.float), numThreads=n_jobs, batchsize=512, K=n_atoms, **meta, verbose=False).T return D_dl.reshape(n_atoms, 1, *atom_support), meta @memory.cache def compute_cdl(X, n_atoms, atom_support, D_init, reg=.2, window=False, n_jobs=10): """Compute dictionary using Dicodile.
import numpy as np import spams param = {'K': 100, 'lambda1': 0.15, 'numThreads': 4, 'batchsize': 400, 'iter': 10} X = np.zeros((5, 5), dtype=float) X = np.asfortranarray(X) D = spams.trainDL(X, **param)
import cPickle import spams parser = argparse.ArgumentParser(description='dictionary learning') parser.add_argument('--feature_file', dest='feature_file', help='feature file', default='cache/train_deep_feat.pkl', type=str) args = parser.parse_args() if not os.path.exists(args.feature_file): print 'feature file does not exist!' sys.exit(-1) with open(args.feature_file, 'r') as fd: t1 = time.time() feat = cPickle.load(fd) t2 = time.time() feat = feat[1].T print "feature file loaded from " + args.feature_file + ' in %f seconds' % (t2-t1) dl_params = {'K': 100, 'lambda1' : 0.15, 'numThreads' : 4, 'batchsize' : 400, 'iter': 1000} t1 = time.time() D = spams.trainDL(feat, **dl_params) t2 = time.time() print 'Dictionary Learning: finish %d iterations in %f seconds' % (dl_params['iter'], t2-t1)
def run_multiprocessing_array_spams_trainDL(result_array_type, result_array, X_array_type, X_array, *args, **kwargs): """ Designed to start spams.trainDL in a separate process and handle the result in an unnoticeably different way. It is necessary to run SPAMS in a separate process as segmentation faults have been discovered in later parts of the Python code dependent on whether SPAMS has run or not. It is suspected that spams may interfere with the interpreter. Thus, it should be sandboxed (run in a different Python interpreter) so that it doesn't damage what happens in this one. This particular version uses a multiprocessing.Array to share memory to return the resulting dictionary. Args: result_array_type(numpy.ctypeslib.ndpointer): a pointer type with properties needed by result_array. result_array(multiprocessing.RawArray): shared memory array to store results in. X_array_type(numpy.ctypeslib.ndpointer): a pointer type with properties needed by X_array. X_array(numpy.ndarray): currently uses numpy ndarray as input. *args(list): a list of position arguments to pass to spams.trainDL. *kwargs(dict): a dictionary of keyword arguments to pass to spams.trainDL. Note: This is somewhat faster than using multiprocessing.Queue. """ # Just to make sure this exists in the new process. Shouldn't be necessary. import numpy # Just to make sure this exists in the new process. Shouldn't be necessary. # Also, it is not needed outside of calling this function. import spams as_ordered_array_dict = { "F_CONTIGUOUS" : numpy.asfortranarray, "C_CONTIGUOUS" : numpy.ascontiguousarray } # Construct X from shared array. X_dtype = X_array_type._dtype_ X_shape = X_array_type._shape_ X_flags = numpy.core.multiarray.flagsobj(X_array_type._flags_) X = numpy.frombuffer(X_array, dtype = X_dtype).reshape(X_shape) X.setflags(X_flags) for order_name, as_ordered_array in as_ordered_array_dict.items(): if order_name in X_array_type.__name__: X = as_ordered_array(X) # Construct the result to use the shared buffer. result_dtype = result_array_type._dtype_ result_shape = result_array_type._shape_ result_flags = numpy.core.multiarray.flagsobj(result_array_type._flags_) result = numpy.frombuffer(result_array, dtype = result_dtype).reshape(result_shape) result.setflags(result_flags) for order_name, as_ordered_array in as_ordered_array_dict.items(): if order_name in result_array_type.__name__: result = as_ordered_array(result) result[:] = spams.trainDL(X, *args, **kwargs)
import scipy.io import matplotlib.pyplot as plt import mpl_toolkits.mplot3d as p3d import spams #import cvxpy as cvx from datasets.datasetMat import CMUDataSet # The coordinate system of poses3Ds: # origin: (x_0,y_0,z_0) which present the mean value of all joint's coordinate (x,y,z) # x Axis: vector point from left shoulder to right shoulder # y Axis: vector point from midpoint of left and right shoulder to waist # z Axis: the cross product of x Axis and y Axis poses3Ds, scale3 = CMUDataSet.getPose3DNormalized() poses2Ds, scale2 = CMUDataSet.getPose2DNormalized() # param = {'K': 200, 'lambda1': 0.01, 'iter': 300} #not the same result compared to authors original source code B = spams.trainDL(np.asfortranarray(poses3Ds), **param) init_pose = np.expand_dims(np.mean(poses3Ds, 1), axis=1) print(init_pose.shape) # scipy.io.savemat('./datasets/BaseMatrix.mat', mdict={ 'init_pose': init_pose, 'B': B })
def extract_components(mov_tot, n_components=6, normalize_std=True, max_iter_DL=-30, method_factorization='nmf', **kwargs): """ From optical flow images can extract spatial and temporal components Args: mov_tot: ndarray (can be 3 or 4D) contains the optical flow values, either in cartesian or polar, either one (3D) or both (4D coordinates) the input is generated by the compute_optical_flow function n_components: int number of components to look for normalize_std: bool whether to normalize each oof the optical flow components normalize_output_traces: boolean whether to normalize the behavioral traces so that they match the units in the movie Returns: spatial_filter: ndarray set of spatial inferred filters time_trace:ndarray set of time components norm_fact: ndarray used notmalization factors """ if mov_tot.ndim == 4: if normalize_std: norm_fact = np.nanstd(mov_tot, axis=(1, 2, 3)) mov_tot = old_div(mov_tot, norm_fact[:, np.newaxis, np.newaxis, np.newaxis]) else: norm_fact = np.array([1., 1.]) c, T, d1, d2 = np.shape(mov_tot) else: norm_fact = 1 T, d1, d2 = np.shape(mov_tot) c = 1 tt = time.time() newm = np.reshape(mov_tot, (c * T, d1 * d2)) if method_factorization == 'nmf': nmf = NMF(n_components=n_components, **kwargs) time_trace = nmf.fit_transform(newm) spatial_filter = nmf.components_ spatial_filter = np.concatenate([ np.reshape(sp, (d1, d2))[np.newaxis, :, :] for sp in spatial_filter ], axis=0) elif method_factorization == 'dict_learn': import spams newm = np.asfortranarray(newm, dtype=np.float32) time_trace = spams.trainDL(newm, K=n_components, mode=0, lambda1=1, posAlpha=True, iter=max_iter_DL) spatial_filter = spams.lasso(newm, D=time_trace, return_reg_path=False, lambda1=0.01, mode=spams.spams_wrap.PENALTY, pos=True) spatial_filter = np.concatenate([ np.reshape(sp, (d1, d2))[np.newaxis, :, :] for sp in spatial_filter.toarray() ], axis=0) time_trace = [np.reshape(ttr, (c, T)).T for ttr in time_trace.T] el_t = time.time() - tt print(el_t) return spatial_filter, time_trace, norm_fact
def local_denoise(data, block_size, overlap, variance, n_iter=10, mask=None, dtype=np.float64, n_cores=None, use_threading=False, verbose=False, mp_method=None): if verbose: logger.setLevel(logging.INFO) if mask is None: mask = np.ones(data.shape[:-1], dtype=np.bool) # no overlapping blocks for training no_over = (0, 0, 0, 0) X = im2col_nd(data, block_size, no_over) # Solving for D param_alpha = {} param_alpha['pos'] = True param_alpha['mode'] = 1 param_D = {} param_D['verbose'] = False param_D['posAlpha'] = True param_D['posD'] = True param_D['mode'] = 2 param_D['lambda1'] = 1.2 / np.sqrt(np.prod(block_size)) param_D['K'] = int(2 * np.prod(block_size)) param_D['iter'] = 150 param_D['batchsize'] = 500 param_D['numThreads'] = n_cores if 'D' in param_alpha: param_D['D'] = param_alpha['D'] mask_col = im2col_nd(np.broadcast_to(mask[..., None], data.shape), block_size, no_over) train_idx = np.sum(mask_col, axis=0) > (mask_col.shape[0] / 2.) train_data = X[:, train_idx] train_data = np.asfortranarray(train_data[:, np.any(train_data != 0, axis=0)], dtype=dtype) train_data /= np.sqrt(np.sum(train_data**2, axis=0, keepdims=True), dtype=dtype) param_alpha['D'] = spams.trainDL(train_data, **param_D) param_alpha['D'] /= np.sqrt( np.sum(param_alpha['D']**2, axis=0, keepdims=True, dtype=dtype)) param_D['D'] = param_alpha['D'] del train_data, X, mask_col if use_threading or (n_cores == 1): param_alpha['numThreads'] = n_cores param_D['numThreads'] = n_cores else: param_alpha['numThreads'] = 1 param_D['numThreads'] = 1 arglist = ((data[:, :, k:k + block_size[2]], mask[:, :, k:k + block_size[2]], variance[:, :, k:k + block_size[2]], block_size, overlap, param_alpha, param_D, dtype, n_iter) for k in range(data.shape[2] - block_size[2] + 1)) if use_threading: data_denoised = starmap(processer, arglist) else: time_multi = time() parallel_processer = multiprocesser(processer, n_cores=n_cores, mp_method=mp_method) data_denoised = parallel_processer(arglist) logger.info('Multiprocessing done in {0:.2f} mins.'.format( (time() - time_multi) / 60.)) # Put together the multiprocessed results data_subset = np.zeros_like(data, dtype=np.float32) divider = np.zeros_like(data, dtype=np.int16) for k, content in enumerate(data_denoised): data_subset[:, :, k:k + block_size[2]] += content divider[:, :, k:k + block_size[2]] += 1 data_subset /= divider return data_subset
def test_trainDL(): img_file = "boat.png" try: img = Image.open(img_file) except: print "Cannot load image %s : skipping test" % img_file return None I = np.array(img) / 255.0 if I.ndim == 3: A = np.asfortranarray(I.reshape((I.shape[0], I.shape[1] * I.shape[2]))) rgb = True else: A = np.asfortranarray(I) rgb = False m = 8 n = 8 X = spams.im2col_sliding(A, m, n, rgb) X = X - np.tile(np.mean(X, 0), (X.shape[0], 1)) X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)), (X.shape[0], 1)), dtype=myfloat) param = { "K": 100, # learns a dictionary with 100 elements "lambda1": 0.15, "numThreads": 4, "batchsize": 400, "iter": 1000, } ########## FIRST EXPERIMENT ########### tic = time.time() D = spams.trainDL(X, **param) tac = time.time() t = tac - tic print "time of computation for Dictionary Learning: %f" % t ##param['approx'] = 0 # save dictionnary as dict.png _objective(X, D, param, "dict") #### SECOND EXPERIMENT #### print "*********** SECOND EXPERIMENT ***********" X1 = X[:, 0 : X.shape[1] / 2] X2 = X[:, X.shape[1] / 2 - 1 :] param["iter"] = 500 tic = time.time() (D, model) = spams.trainDL(X1, return_model=True, **param) tac = time.time() t = tac - tic print "time of computation for Dictionary Learning: %f\n" % t _objective(X, D, param, "dict1") # Then reuse the learned model to retrain a few iterations more. param2 = param.copy() param2["D"] = D tic = time.time() (D, model) = spams.trainDL(X2, return_model=True, model=model, **param2) tac = time.time() t = tac - tic print "time of computation for Dictionary Learning: %f" % t _objective(X, D, param, "dict2") #################### THIRD & FOURTH EXPERIMENT ###################### # let us add sparsity to the dictionary itself print "*********** THIRD EXPERIMENT ***********" param["modeParam"] = 0 param["iter"] = 1000 param["gamma1"] = 0.3 param["modeD"] = 1 tic = time.time() D = spams.trainDL(X, **param) tac = time.time() t = tac - tic print "time of computation for Dictionary Learning: %f" % t _objective(X, D, param) # * DISPLAY print "*********** FOURTH EXPERIMENT ***********" param["modeParam"] = 0 param["iter"] = 1000 param["gamma1"] = 0.3 param["modeD"] = 3 tic = time.time() D = spams.trainDL(X, **param) tac = time.time() t = tac - tic print "time of computation for Dictionary Learning: %f" % t _objective(X, D, param) return None