def __call__(self, inp, mode): if mode == 'train' or mode == 'valid': inp = inp-inp.mean(0).dimshuffle('x', 0, 1, 2) inp = inp / T.sqrt((inp**cast_x(2)).mean(0).dimshuffle('x', 0, 1, 2) + cast_x(0.0001)) return inp * self.gamma + self.beta else : mean = self.mean.dimshuffle('x', 0, 1, 2) std = T.sqrt(self.var + cast_x(0.0001)).dimshuffle('x', 0, 1, 2) beta = self.beta.dimshuffle('x', 0, 1, 2) gamma = self.gamma.dimshuffle('x', 0, 1, 2) return inp*gamma/std + beta - mean*gamma/std
def learn(self, model_inp, layer_inp, data): print(' Learning {}'.format(self.name)) count = shared_x(0., name='count') updates = [(count, count + cast_x(layer_inp.shape[0]))] updates += [(self.mean, self.mean + layer_inp.sum(0))] updates += [(self.var, self.var + (layer_inp**2).sum(0))] fn = th.function(inputs=[model_inp], updates = updates) for i, (example, label) in enumerate(data): fn(example) self.mean.set_value((self.mean/count).eval()) self.var.set_value((self.var/count - self.mean**2).eval()) print(' - mean: mean(mean) = {:0.2f}; std(mean) = {:0.2f}'.format(float(self.mean.mean().eval()), float(self.mean.std().eval()))) print(' - var: mean(var) = {:0.2f}; std(var) = {:0.2f}'.format(float(self.var.mean().eval()), float(self.var.std().eval())))
def __call__(self, inp, mode=None): corruption_type = self.corruption_type corruption_level = self.corruption_level if mode != 'train': print('corrupt : mode (= {}) != "train"'.format(mode)) return inp elif corruption_level == 0 or corruption_type == None: return inp elif corruption_type == 'zeromask': return self.rng.binomial( size=inp.shape, n=1, p=1.0 - corruption_level, dtype=float_x) * inp / cast_x(1 - corruption_level) elif corruption_type == 'gaussian': return self.rng.normal( size=inp.shape, avg=0.0, std=corruption_level, dtype=float_x) + inp else: raise ValueError
def learn(self, model_inp, layer_inp, data): print(' Learning {}'.format(self.__class__.__name__)) count = shared_x(0., name='count') updates = [(count, count + cast_x(layer_inp.shape[0]))] updates += [(self.dc, self.dc + layer_inp.mean(3).mean(2).sum(0))] updates += [(self.std, self.std + (layer_inp**2).mean(3).mean(2).sum(0))] fn = th.function(inputs=[model_inp], updates=updates) for i, (example, label) in enumerate(data): if i >= self.nb_pretrain_iterations: break fn(example) self.dc.set_value((self.dc / count).eval()) self.std.set_value(T.sqrt(self.std / count - self.dc**2).eval()) print(' - dc centering: mean(dc) = {:0.2f}; std(dc) = {:0.2f}'. format(float(self.dc.mean().eval()), float(self.dc.std().eval()))) print( ' - contrast nrm: mean(std) = {:0.2f}; std(std) = {:0.2f}'. format(float(self.std.mean().eval()), float(self.std.std().eval())))
def __graph_output(self, epoch): self.update_learning_stats_fn() ######## Learning statistic associated with optimized parameters for param in self.params: last_update = self.last_batch_update[param] this_update = self.this_batch_update[param] init = self.init[param] sp = self.subplots[param] name = str(param) if param.ndim == 1: data = param.get_value() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 0].hist(remove=True, x=data, bins=35) sp[0, 0].set_title('{} at epoch {}'.format(param, epoch), fontsize=10) sp[1, 0].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) data = (param - init).eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 1].hist(remove=True, x=data, bins=35) sp[0, 1].set_title('{}-initial'.format(param), fontsize=10) sp[1, 1].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) data = this_update.get_value() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 2].hist(remove=True, x=data, bins=35) sp[0, 2].set_title('{} gradient update'.format(param), fontsize=10) sp[1, 2].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) elif param.ndim > 1: if param.ndim == 2: param = param.T last_update = last_update.T this_update = this_update.T init = init.T param = param.flatten(2) last_update = last_update.flatten(2) this_update = this_update.flatten(2) init = init.flatten(2) # Norms nrm = T.sqrt((param**2).sum(1)) data = nrm.eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 0].hist(remove=True, x=data, bins=35) sp[0, 0].set_title( r'$\Vert w_i \Vert \/ i \in [1,{}]$ at epoch {}'.format( len(data), epoch), fontsize=10) sp[1, 0].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) # Orthonormality param_nrm = param / nrm[:, None] data = T.dot(param_nrm, param_nrm.T).flatten().eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 1].hist(remove=True, x=data, bins=60) sp[0, 1].set_yscale('log', nonposy='clip') sp[0, 1].set_title( r'$ {{ \frac{{ {{w_i}}^\intercal w_j }}{{ \Vert w_i \Vert \Vert w_j \Vert }} }} {{\vert}}_{{(t={})}} \/ i,j \in [1,{}] $' .format(epoch, int(sqrt(len(data)))), fontsize=10) sp[1, 1].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) # Rotations with respect to initial state cos = (param * init).sum(1) nrm = T.sqrt((param**2).sum(1)) nrm_init = T.sqrt((init**2).sum(1)) fac = cast_x(180. / np.pi) data = (T.arccos(cos / (nrm * nrm_init)) * fac).flatten().eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 2].hist(remove=True, x=data, bins=35) sp[0, 2].set_title( r'$ \measuredangle ( w^{{(t={})}}_i, w^{{(t=0)}}_i ) \/ i \in [1,{}] $' .format(epoch, len(data)), fontsize=10) sp[1, 2].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) # Update norm data = T.sqrt((this_update**2).sum(1)).eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 3].hist(remove=True, x=data, bins=35) sp[0, 3].set_title( r'$\Vert u_i \Vert \/ i \in [1,{}]$ at epoch {}'.format( len(data), epoch), fontsize=10) sp[1, 3].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) # Update rotation with respect to weight vectors cos = (param * this_update).sum(1) nrm = T.sqrt((param**2).sum(1)) nrm_init = T.sqrt((this_update**2).sum(1)) fac = cast_x(180. / np.pi) data = (T.arccos(cos / (nrm * nrm_init)) * fac).flatten().eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) try: sp[0, 4].hist(remove=True, x=data, bins=35) except: print(param) print(data.shape) raise sp[0, 4].set_title( r'$ \measuredangle ( w^{{(t={})}}_i, u^{{(t={})}}_i ) \/ i \in [1,{}] $' .format(epoch, epoch, len(data)), fontsize=10) sp[1, 4].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) # Update rotation if this update with respect to the last cos = (this_update * last_update).sum(1) nrm_this = T.sqrt((last_update**2).sum(1)) nrm_last = T.sqrt((this_update**2).sum(1)) fac = cast_x(180. / np.pi) data = (T.arccos(cos / (nrm_this * nrm_last)) * fac).flatten().eval() p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 5].hist(remove=True, x=data, bins=35) sp[0, 5].set_title( r'$ \measuredangle ( u^{{(t={})}}_i, u^{{(t={})}}_i ) \/ i \in [1,{}] $' .format(epoch, epoch - 1, len(data)), fontsize=10) sp[1, 5].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std())) else: continue sp.savefig(join( self.output_path, '{}_{}_learning_stats_{}.png'.format({ 0: 'unsupervised', 1: 'supervised' }[self.supervised], self.model_id, name)), dpi=100) ######## Learning statistic associated with optimized parameters if self.debug_nodes: outputs = self.debug_fn() for (name, node), data in zip(list(self.debug_nodes.items()), outputs): sp = self.subplots[node] data = data.flatten() nonzeros = float((data != 0).mean()) p005, median, p995 = np.percentile(data, [0.5, 50, 99.5]) sp[0, 0].hist(remove=True, x=data, bins=60) sp[0, 0].set_yscale('log', nonposy='clip') sp[0, 0].set_title('{} at t={}'.format(name, epoch), fontsize=6) sp[1, 0].add_point(p005=(epoch, p005), median=(epoch, median), p995=(epoch, p995), std=(epoch, data.std()), nonzero=(epoch, nonzeros)) sp[1, 0].set_title('Non-zero = {:0.4f}%'.format(nonzeros * 100), fontsize=8) sp.savefig(join(self.output_path, name + '.png'), dpi=100)
def learn(self, inp, trainer, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type=None, hid_corruption_level=0, cost_weight = cast_x(1), learn_scale_first=False, debug_path=None, nb_frames=None): if trainer: # Build noisy autoencoder for training train_enc = self(inp, inp_corruption_type, inp_corruption_level, 'full') train_dec = self.dec(train_enc, hid_corruption_type, hid_corruption_level) train_cost = self.cost(inp, train_dec, cost_weight) # Build noiseless autoencoder for validation valid_enc = self(inp, border_mode = 'full') valid_dec = self.dec(valid_enc) valid_cost = self.cost(inp, valid_dec, cost_weight) # Quick training for weight scaling if learn_scale_first: lookback = trainer.lookback momentum = trainer.momentum trainer.lookback = int(ceil(trainer.lookback / 20.)) trainer.momentum = 0 trainer([self.scale], train_cost, valid_cost, model_id=self.model_id + '_scaling').learn() trainer.lookback = lookback trainer.momentum = momentum debug_args = dd() debug_args.debug_path = debug_path debug_args.nb_frames = nb_frames debug_args.prefix = 'unsupervised' self.trainer = trainer(self.params.values(), train_cost, valid_cost, model_id=self.model_id, additionnal_updates = self.additionnal_update(), debug_calls=(self.debug_call, debug_args), debug_nodes = dd({'unsupervised_'+self.model_id+'_encoder_act_trainset':train_enc})) # Learn model self.trainer.learn()
def cost(self, inp, dec, weights = cast_x(1)): return ((cast_x(0.5)*(dec-inp)**2).mean(3).mean(2).mean(1)*weights).mean()
def conv_normalize(inp): return inp / T.sqrt((inp**cast_x(2)).sum(3).sum(2).sum(1)).dimshuffle( 0, 'x', 'x', 'x') + cast_x(0.00001)