def update_temporal_components(Y, A, b, Cin, fin, bl=None, c1=None, g=None, sn=None, nb=1, ITER=2, method_foopsi='constrained_foopsi', memory_efficient=False, debug=False, dview=None, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A nb: [optional] int Number of background components ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). SLURM: using SLURM scheduler memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d, T = np.shape(Y) nr = np.shape(A)[-1] if b is not None: if b.shape[0] < b.shape[1]: b = b.T nb = b.shape[1] if bl is None: bl = np.repeat(None, nr) if c1 is None: c1 = np.repeat(None, nr) if g is None: g = np.repeat(None, nr) if sn is None: sn = np.repeat(None, nr) A = scipy.sparse.hstack((A, coo_matrix(b))) S = np.zeros(np.shape(Cin)) Cin = np.vstack((Cin, fin)) C = Cin nA = np.squeeze(np.array(np.sum(np.square(A.todense()), axis=0))) Cin = coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T) * spdiags(1. / nA, 0, nr + nb, nr + nb) AA = ((A.T.dot(A)) * spdiags(1. / nA, 0, nr + nb, nr + nb)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) Cin = np.array(Cin.todense()) for iter in range(ITER): O, lo = update_order(A.tocsc()[:, :nr]) P_ = [] for count, jo_ in enumerate(O): jo = np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:, jo.flatten()] + Cin[jo, :].T Ctemp = np.zeros((np.size(jo), T)) Stemp = np.zeros((np.size(jo), T)) btemp = np.zeros((np.size(jo), 1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo), kwargs['p'])) nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in = [(np.squeeze(np.array(Ytemp[:, jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if dview is not None: # if debug: results = dview.map_async(constrained_foopsi_parallel, args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel, args_in) else: results = map(constrained_foopsi_parallel, args_in) for chunk in results: pars = dict() C_, Sp_, Ytemp_, cb_, c1_, sn_, gn_, jj_ = chunk Ctemp[jj_, :] = C_[None, :] Stemp[jj_, :] = Sp_ Ytemp[:, jj_] = Ytemp_[:, None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_, :] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_, np.abs(gtemp[jj, :]) > 0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp - C[jo, :]).T * AA[jo, :] #YrA[:,jo] = Ytemp C[jo, :] = Ctemp.copy() S[jo, :] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count + 1])) + ' out of total ' + str( nr) + ' temporal components updated' ii = nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) for ii in np.arange(nr, nr + nb): cc = np.maximum(YrA[:, ii] + np.atleast_2d(Cin[ii, :]).T, 0) YrA -= (cc - np.atleast_2d(Cin[ii, :]).T) * AA[ii, :] C[ii, :] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if dview is not None: dview.results.clear() if scipy.linalg.norm(Cin - C, 'fro') / scipy.linalg.norm( C, 'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:, :] C = C[:nr, :] YrA = np.array(YrA[:, :nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) return C, f, S, bl, c1, sn, g, YrA #,P_
def update_temporal_components_parallel(Y, A, b, Cin, fin, bl = None, c1 = None, g = None, sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for basis projection pursuit cvx or spgl1 or debug for fast but possibly imprecise temporal components Returns -------- C: np.matrix matrix of temporal components (K x T) f: np.array vector of temporal background (length T) Y_res: np.ndarray matrix with current residual (d x T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d,T = np.shape(Y); nr = np.shape(A)[-1] if bl is None: bl=np.repeat(None,nr) if c1 is None: c1=np.repeat(None,nr) if g is None: g=np.repeat(None,nr) if sn is None: sn=np.repeat(None,nr) A = scipy.sparse.hstack((A,coo_matrix(b))) S = np.zeros(np.shape(Cin)); Cin = np.vstack((Cin,fin)); C = Cin; #% nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0))) Sp = np.zeros((nr,T)) #YrA = Y.T*A - Cin.T*(A.T*A); # Y=np.matrix(Y) # C=np.matrix(C) # Cin=np.matrix(Cin) # YrA2 = Y.T*A - Cin.T*(A.T*A); Cin=coo_matrix(Cin) YrA = (A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes Cin=np.array(Cin.todense()) for iter in range(ITER): O,lo = update_order(A.tocsc()[:,:nr]) P_=[]; for count,jo_ in enumerate(O): jo=np.array(list(jo_)) Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ctemp = np.zeros((np.size(jo),T)) Stemp = np.zeros((np.size(jo),T)) btemp = np.zeros((np.size(jo),1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo),kwargs['p'])); nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] if backend == 'ipyparallel': results = dview.map_sync(constrained_foopsi_parallel,args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel,args_in) else: raise Exception('Backend not defined. Use either single_thread or ipyparallel') for chunk in results: #pars=dict(kwargs) C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk Ctemp[jj_,:] = C_[None,:] Stemp[jj_,:] = Sp_ Ytemp[:,jj_] = Ytemp_[:,None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_,:] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gtemp[jj,:]#[jj_,np.abs(gtemp[jj,:])>0] #pars['b'] = cb_ # pars['c1'] = c1_ # pars['neuron_sn'] = sn_ # pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] # ## for jj = 1:length(O{jo}) ## P.gn(O{jo}(jj)) = {gtemp(jj,abs(gtemp(jj,:))>0)'}; ## end # pars['neuron_id'] = jo[jj_] # P_.append(pars) YrA[:,jo] = Ytemp C[jo,:] = Ctemp S[jo,:] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count])) + ' out of total ' + str(nr) + ' temporal components updated \n' ii=nr YrA[:,ii] = YrA[:,ii] + nA[ii]*np.atleast_2d(Cin[ii,:]).T cc = np.maximum(YrA[:,ii]/nA[ii],0) C[ii,:] = cc[:].T YrA[:,ii] = YrA[:,ii] - nA[ii]*np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C Y_res = Y - A*C # this includes the baseline term f = C[nr:,:] C = C[:nr,:] P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C,f,Y_res,S,bl,c1,sn,g
def update_temporal_components(Y, A, b, Cin, fin, bl = None, c1 = None, g = None, sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, debug=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) 'spgl1': using the spgl1 package 'debug': using spgl1 without spike non-negativity constraints (just for debugging purposes) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d,T = np.shape(Y); nr = np.shape(A)[-1] if bl is None: bl=np.repeat(None,nr) if c1 is None: c1=np.repeat(None,nr) if g is None: g=np.repeat(None,nr) if sn is None: sn=np.repeat(None,nr) A = scipy.sparse.hstack((A,coo_matrix(b))) S = np.zeros(np.shape(Cin)); Cin = np.vstack((Cin,fin)); C = Cin; nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0))) #import pdb #pdb.set_trace() Cin=coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T)*spdiags(1./nA,0,nr+1,nr+1) AA = ((A.T.dot(A))*spdiags(1./nA,0,nr+1,nr+1)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes Cin=np.array(Cin.todense()) for iter in range(ITER): O,lo = update_order(A.tocsc()[:,:nr]) P_=[]; for count,jo_ in enumerate(O): jo=np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:,jo.flatten()] + Cin[jo,:].T Ctemp = np.zeros((np.size(jo),T)) Stemp = np.zeros((np.size(jo),T)) btemp = np.zeros((np.size(jo),1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo),kwargs['p'])); nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if backend == 'ipyparallel': # if debug: results = dview.map_async(constrained_foopsi_parallel,args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel,args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel,args_in) else: raise Exception('Backend not defined. Use either single_thread or ipyparallel') for chunk in results: pars=dict() C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk Ctemp[jj_,:] = C_[None,:] Stemp[jj_,:] = Sp_ Ytemp[:,jj_] = Ytemp_[:,None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_,:] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp-C[jo,:]).T*AA[jo,:] #YrA[:,jo] = Ytemp C[jo,:] = Ctemp.copy() S[jo,:] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count+1])) + ' out of total ' + str(nr) + ' temporal components updated' ii=nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) cc = np.maximum(YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T,0) YrA -= (cc-np.atleast_2d(Cin[ii,:]).T)*AA[ii,:] C[ii,:] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:,:] C = C[:nr,:] YrA = np.array(YrA[:,:nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C,f,S,bl,c1,sn,g,YrA #,P_