def orthogonality_cost(self, orth_lambda): Wv = self.wv_norms * self.Wv cost = orthogonality.orthogonal_pools(Wv, self.bw_s) params = [self.Wv] if self.flags.get('split_norm', False): params += [self.wv_norms] return utils_cost.Cost(orth_lambda * cost, params)
def get_sparsity_cost(self): # update mean activation using exponential moving average hack_h = self.h_given_v(self.sp_pos_v) # define loss based on value of sp_type if self.sp_type == 'kl': eps = npy_floatX(1. / self.batch_size) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) else: raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type) cost = T.zeros((), dtype=floatX) params = [] if self.sp_weight['h']: cost += self.sp_weight['h'] * T.sum( loss(self.sp_targ['h'], hack_h.mean(axis=0))) params += [self.hbias] if self.sp_type in ['kl'] and self.sp_weight['h']: params += [self.Wv, self.alpha, self.mu] if self.flags.get('split_norm', False): params += [self.wv_norms] return utils_cost.Cost(cost, params)
def ml_cost(self, pos_v, pos_x, neg_v, neg_x): pos_cost = T.sum(self.free_energy(pos_v, pos_x)) neg_cost = T.sum(self.free_energy(neg_v, neg_x)) batch_cost = pos_cost - neg_cost cost = batch_cost / self.batch_size return utils_cost.Cost(cost, self.params(), [pos_v, pos_x, neg_v, neg_x])
def ml_cost_energy(self, pos_h, pos_s, pos_v, neg_h, neg_s, neg_v): pos_cost = T.sum(self.energy(pos_h, pos_s, pos_v)) neg_cost = T.sum(self.energy(neg_h, neg_s, neg_v)) batch_cost = pos_cost - neg_cost cost = batch_cost / self.batch_size return utils_cost.Cost(cost, self.params(), [pos_h, pos_s, pos_v, neg_h, neg_s, neg_v])
def ml_cost(self, pos_v, neg_v): pos_cost = T.sum(self.free_energy(pos_v)) neg_cost = T.sum(self.free_energy(neg_v)) batch_cost = pos_cost - neg_cost cost = batch_cost / self.batch_size # build gradient of cost with respect to model parameters return costmod.Cost(cost, self.params(), [pos_v, neg_v])
def get_sparsity_cost(self): # update mean activation using exponential moving average hack_h = self.h_given_v(self.sp_pos_v) # define loss based on value of sp_type if self.sp_type == 'KL': eps = 1./self.batch_size loss = lambda targ, val: - targ * T.log(eps + val) - (1.-targ) * T.log(1. - val + eps) elif self.sp_type.startswith('Lee07'): loss = lambda targ, val: abs(targ - val) else: raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type) cost = T.zeros((), dtype=floatX) params = [] if self.sp_weight['h']: cost += self.sp_weight['h'] * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0))) params += [self.hbias] if self.sp_type in ['KL','Lee07'] and self.sp_weight['h']: params += [self.Wv, self.alpha, self.mu] return utils_cost.Cost(cost, params)
def get_sparsity_cost(self): hack_h = self.h_given_v(self.input) # define loss based on value of sp_type eps = npy_floatX(1. / self.batch_size) loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \ - npy_floatX(1-targ) * T.log(1 - val + eps) params = [] cost = T.zeros((), dtype=floatX) if self.sp_weight['h']: params += [self.Wv, self.hbias] cost += self.sp_weight['h'] * T.sum( loss(self.sp_targ['h'], hack_h.mean(axis=0))) return costmod.Cost(cost, params, [hack_h])
def ml_cost(self): """ Variational approximation to the maximum likelihood positive phase. :param v: T.matrix of shape (batch_size, n_v), training examples :return: tuple (cost, gradient) """ pos_h = self.h_given_v(self.input) pos_s = self.s_given_hv(pos_h, self.input) pos_cost = T.sum(self.energy(pos_h, pos_s, self.input)) neg_cost = T.sum(self.energy(self.neg_h, self.neg_s, self.neg_v)) batch_cost = pos_cost - neg_cost cost = batch_cost / self.batch_size # build gradient of cost with respect to model parameters cte = [pos_h, pos_s, self.neg_h, self.neg_s, self.neg_v] return utils_cost.Cost(cost, self.params, cte)
def get_reg_cost(self, l2=None, l1=None): """ Builds the symbolic expression corresponding to first-order gradient descent of the cost function ``cost'', with some amount of regularization defined by the other parameters. :param l2: dict containing amount of L2 regularization for Wg, Wh and Wv :param l1: dict containing amount of L1 regularization for Wg, Wh and Wv """ cost = T.zeros((), dtype=floatX) params = [] for p in self.params: if l1.get(p.name, 0): cost += l1[p.name] * T.sum(abs(p)) params += [p] if l2.get(p.name, 0): cost += l2[p.name] * T.sum(p**2) params += [p] return utils_cost.Cost(cost, params)
def get_reg_cost(self, l2=None, l1=None): """ Builds the symbolic expression corresponding to first-order gradient descent of the cost function ``cost'', with some amount of regularization defined by the other parameters. :param l2: dict whose values represent amount of L2 regularization to apply to parameter specified by key. :param l1: idem for l1. """ cost = T.zeros((), dtype=floatX) params = [] for p in self.params(): if l1.get(p.name, 0): cost += l1[p.name] * T.sum(abs(p)) params += [p] if l2.get(p.name, 0): cost += l2[p.name] * T.sum(p**2) params += [p] return utils_cost.Cost(cost, params)