my_data = { 'y': my_y, } # Prepare model... # Configure DataLogger print_list = ('T', 'Q', 'pi', 'sigma', 'N', 'N_use', 'L', 'W_noise', 'sigma_noise', 'pi_noise', 'prior_mass') h5_list = ('W', 'pi', 'sigma', 'y', 'N', 'N_use', 'prior_mass', 'states', 'Hprime', 'H', 'gamma', 'channel') h5_list += ('infered_posterior', 'infered_states', 'series', 'rseries', 'ry', 'rs', 'overlap', 'ty', 'ts', 'overlap') dlog.set_handler(print_list, TextPrinter) dlog.set_handler(h5_list, StoreToH5, output_path + '/result.h5') dlog.append('y', my_y) dlog.append('N', N) dlog.append('gamma', gamma) dlog.append('H', H) dlog.append('overlap', overlap) dlog.append('states', states) model_params = {} model_params = model.standard_init(my_data) #Initialize W with data points model_params['W'][:, :] = my_y[np.abs(my_y).sum(1) > 0.5][:H, :].T comm.Bcast([model_params['W'], MPI.DOUBLE]) # Choose annealing schedule anneal = LinearAnnealing(200)
my_data = { 'y': my_y, } pprint("{}".format(my_y[8, :10])) # Configure DataLogger print_list = ('T', 'Q', 'pi', 'sigma', 'N', 'N_use', 'L', 'W_noise', 'sigma_noise', 'pi_noise', 'prior_mass') h5_list = ('W', 'pi', 'sigma', 'y', 'N', 'N_use', 'prior_mass', 'states', 'Hprime', 'H', 'gamma') # dlog.set_handler(['L'], YTPlotter) dlog.set_handler(print_list, TextPrinter) dlog.set_handler(print_list, StoreToTxt, output_path + '/terminal.txt') dlog.set_handler(h5_list, StoreToH5, output_path + '/result.h5') # dlog.append('y',my_data['y']) dlog.append('N', N) dlog.append('Hprime', Hprime) dlog.append('H', H) dlog.append('gamma', gamma) dlog.append('states', states) # Prepare model... model_params = model.standard_init(my_data) # init_sparseness = 1/float(H) # model_params['pi'] = np.array([1- init_sparseness ,init_sparseness]) dlog.append_all(model_params) # Choose annealing schedule anneal = LinearAnnealing(200) # anneal['T'] = [(0.0, 1.1),(0.1,1.1), (0.5, 1.)] anneal['T'] = [(0.0, 2.), (0.05, 2.), (0.4, 1.)] # anneal['W_noise'] = [(0.0, 0.), (0.05,0.), (0.15,1.), (0.3,3.), (0.5, 0.)]
'y': my_y, 's': my_s, } # import ipdb;ipdb.set_trace() # Prepare model... # Configure DataLogger print_list = ('T', 'Q', 'pi', 'sigma', 'N', 'N_use', 'MAE', 'L') h5_list = ('W', 'pi', 'sigma', 'y', 'N', 'N_use', 'prior_mass', 'states', 'Hprime', 'H', 'gamma', 'mu', 'MAE', 'W_gt', 'pi_gt', 'sigma_gt') dlog.set_handler(print_list, TextPrinter) dlog.set_handler(print_list, StoreToTxt, output_path + '/result.h5') dlog.set_handler(h5_list, StoreToH5, output_path + '/result.h5') dlog.append('W_gt', params_gt['W']) dlog.append('sigma_gt', params_gt['sigma']) dlog.append('pi_gt', params_gt['pi']) dlog.append('N', N) dlog.append('Hprime', Hprime) dlog.append('H', H) dlog.append('gamma', gamma) dlog.append('states', states) model_params = model.standard_init(my_data) dlog.append_all(model_params) for param_name in model_params.keys(): if param_name not in model.to_learn: model_params[param_name] = params_gt[param_name]
def M_step(self, anneal, model_params, my_suff_stat, my_data): """ LinCA M_step my_data variables used: my_data['y'] Datapoints my_data['candidates'] Candidate H's according to selection func. Annealing variables used: anneal['T'] Temperature for det. annealing anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model """ comm = self.comm W = model_params['W'].T pi = model_params['pi'] sigma = model_params['sigma'] # Read in data: my_y = my_data['y'].copy() candidates = my_data['candidates'] logpj_all = my_suff_stat['logpj'] all_denoms = np.exp(logpj_all).sum(axis=1) my_N, D = my_y.shape N = comm.allreduce(my_N) A_pi_gamma = self.get_scaling_factors(model_params['pi']) dlog.append("prior_mass",A_pi_gamma) # _, A_pi_gamma, _=self.get_scaling_factors(model_params['pi']) #Truncate data N_use, my_y,candidates,logpj_all = self._get_sorted_data(N, anneal, A_pi_gamma, all_denoms, candidates,logpj_all,my_y) my_N, D = my_y.shape # update my_N # Precompute corr_all = logpj_all.max(axis=1) # shape: (my_N,) pjb_all = np.exp(logpj_all - corr_all[:, None]) # shape: (my_N, no_states) #Log-Likelihood: L = self.get_likelihood(D,sigma,A_pi_gamma,logpj_all,N_use) dlog.append('L',L) # Allocate my_Wp = np.zeros_like(W) # shape (H, D) my_Wq = np.zeros((self.H,self.H)) # shape (H, H) my_pi = np.zeros_like(pi) # shape (K) my_sigma = 0.0 # SM = self.SM # Iterate over all datapoints for n in range(my_N): y = my_y[n, :] # length D cand = candidates[n, :] # length Hprime pjb = pjb_all[n, :] this_Wp = np.zeros_like(my_Wp) # numerator for current datapoint (H, D) this_Wq = np.zeros_like(my_Wq) # denominator for current datapoint (H, H) this_pi = np.zeros_like(pi) # numerator for pi update (current datapoint) # Handle hidden states with 0 active causes this_pi[self.K_0] = self.H*pjb[0] this_sigma = pjb[0] * (y**2).sum() # Handle hidden states with 1 active cause #FIX: I am sure I need to multiply with pi somewhere here c=0 # import ipdb;ipdb.set_trace() for state in range(self.K): if state == self.K_0: continue sspjb = pjb[c*self.H+1:(c+1)*self.H+1] # this_Wp += np.outer(sspjb,y.T) # this_Wq += sspjb[:,None] * self.SSM[c*self.H:(c+1)*self.H] this_pi[state] += sspjb.sum() recons = self.states[state]*W sqe = ((recons-y)**2).sum(1) this_sigma += (sspjb * sqe).sum() c+=1 this_pi[self.K_0] += ((self.H-1) * pjb[1:(self.K-1)*self.H+1]).sum() this_Wp += np.dot(np.outer(y,pjb[1:(self.K-1)*self.H+1]),self.SSM).T # this_Wq_tmp = np.zeros_like(my_Wq[cand]) # this_Wq_tmp[:,cand] = np.dot(pjb[(self.K-1)*self.H+1:] * SM.T,SM) this_Wq += np.dot(pjb[1:(self.K-1)*self.H+1] * self.SSM.T, self.SSM) if self.gamma>1: # Handle hidden states with more than 1 active cause this_Wp[cand] += np.dot(np.outer(y,pjb[(self.K-1)*self.H+1:]),SM).T this_Wq_tmp = np.zeros_like(my_Wq[cand]) this_Wq_tmp[:,cand] = np.dot(pjb[(self.K-1)*self.H+1:] * SM.T,SM) this_Wq[cand] += this_Wq_tmp this_pi += np.inner(pjb[(self.K-1)*self.H+1:], self.state_abs) W_ = W[cand] # is (Hprime x D) Wbar = np.dot(SM,W_) this_sigma += (pjb[(self.K-1)*self.H+1:] * ((Wbar-y)**2).sum(axis=1)).sum() #Scale down denom = pjb.sum() my_Wp += this_Wp / denom my_Wq += this_Wq / denom my_pi += this_pi / denom my_sigma += this_sigma/ denom/D #Calculate updated W Wp = np.empty_like(my_Wp) Wq = np.empty_like(my_Wq) comm.Allreduce( [my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE] ) comm.Allreduce( [my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE] ) # W_new = np.dot(np.linalg.pinv(Wq), Wp) W_new = np.linalg.lstsq(Wq, Wp)[0] # TODO check and switch to this one # Calculate updated pi pi_new=np.empty_like(pi) # pi_new = E_pi_gamma * comm.allreduce(my_pi) / H / N_use for i in range(self.K): pi_new[i] = comm.allreduce(my_pi[i])/comm.allreduce(my_pi.sum()) eps = 1e-6 if np.any(pi_new<eps): which_lo = pi_new<eps which_hi = pi_new>=eps pi_new[which_lo] += eps - pi_new[which_lo] pi_new[which_hi] -= (eps*np.sum(which_lo))/np.sum(which_hi) if 'penalty' in list(self.__dict__.keys()): self.penalty if self.penalty>pi_new[self.K_0]: r = (1-self.penalty)/(1-pi_new[self.K_0]) pi_new[pi_new!=0] = pi_new[pi_new!=0]*r pi_new[self.K_0] = self.penalty pi_new/=pi_new.sum() # Calculate updated sigma sigma_new = np.sqrt(comm.allreduce(my_sigma) / N_use) if 'W' not in self.to_learn: W_new = W pp("not learning W") if 'pi' not in self.to_learn: pi_new = pi pp("not learning pi") if 'sigma' not in self.to_learn: pp("not learning sigma") sigma_new = sigma for param in anneal.crit_params: exec('this_param = ' + param) anneal.dyn_param(param, this_param) dlog.append('N_use', N_use) return { 'W': W_new.transpose(), 'pi': pi_new, 'sigma': sigma_new, 'Q': 0.}