def spectral_radius(xent, regularizable, projvec_beta=.55): """returns principal eig of the hessian""" # create initial projection vector (randomly and normalized) projvec_init = [ np.random.randn(*r.get_shape().as_list()) for r in regularizable ] magnitude = np.sqrt(np.sum([np.sum(p**2) for p in projvec_init])) projvec_init = projvec_init / magnitude # projection vector tensor variable with tf.variable_scope('projvec'): projvec = [ tf.get_variable(name=r.op.name, dtype=tf.float32, shape=r.get_shape(), trainable=False, initializer=tf.constant_initializer(p)) for r, p in zip(regularizable, projvec_init) ] # layer norm # norm_values = utils.layernormdev(regularizable) norm_values = utils.filtnorm(regularizable) projvec_mul_normvalues = [ tf.multiply(f, p) for f, p in zip(norm_values, projvec) ] # get the hessian-vector product gradLoss = tf.gradients(xent, regularizable) hessVecProd = tf.gradients(gradLoss, regularizable, projvec_mul_normvalues) hessVecProd = [h * n for h, n in zip(hessVecProd, norm_values)] # principal eigenvalue: project hessian-vector product with that same vector xHx = utils.list2dotprod(projvec, hessVecProd) # comopute next projvec if args.curv: nextProjvec = [tf.random_normal(shape=p.get_shape()) for p in projvec] else: normHv = utils.list2norm(hessVecProd) unitHv = [tf.divide(h, normHv) for h in hessVecProd] nextProjvec = [ tf.add(h, tf.multiply(p, projvec_beta)) for h, p in zip(unitHv, projvec) ] normNextPv = utils.list2norm(nextProjvec) nextProjvec = [tf.divide(p, normNextPv) for p in nextProjvec] # diagnostics: dotprod and euclidean distance of new projection vector from previous projvec_corr = utils.list2dotprod(nextProjvec, projvec) # op to assign the new projection vector for next iteration with tf.control_dependencies([projvec_corr]): with tf.variable_scope('projvec_op'): projvec_op = [ tf.assign(p, n) for p, n in zip(projvec, nextProjvec) ] return xHx, projvec_op, projvec_corr, projvec_mul_normvalues
def build_graph(self): '''build the simple neural network computation graph''' # inputs to network self.inputs = tf.placeholder(dtype=tf.float32, shape=(None, self.args.ndim), name='inputs') self.labels = tf.placeholder(dtype=tf.float32, shape=(None, self.args.nclass), name='labels') self.is_training = tf.placeholder(dtype=tf.bool) # training mode flag self.lr = tf.placeholder(tf.float32) self.speccoef = tf.placeholder(tf.float32) # forward prop a = self.inputs for l, nunit in enumerate(self.args.nhidden): a = tf.layers.dense(a, nunit, use_bias=True, activation='relu') logits = tf.layers.dense(a, self.args.nclass) xent = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=logits) self.xent = tf.reduce_mean(xent) # weight decay and hessian reg regularizable = [ t for t in tf.trainable_variables() if t.op.name.find('bias') == -1 ] wdec = tf.global_norm(regularizable)**2 self.spec, self.projvec_op, self.projvec_corr, self.eigvec = spectral_radius( self.xent, tf.trainable_variables(), self.args.projvec_beta) self.loss = self.xent + self.args.wdeccoef * wdec # + self.speccoef*self.spec # gradient operations optim = tf.train.AdamOptimizer(self.lr) grads = tf.gradients(self.loss, tf.trainable_variables()) grads, self.grad_norm = tf.clip_by_global_norm( grads, clip_norm=self.args.max_grad_norm) self.weight_norm = tf.global_norm(tf.trainable_variables()) # training and assignment operations self.train_op = optim.apply_gradients( zip(grads, tf.trainable_variables())) self.inputweights = [ tf.zeros_like(t) for t in tf.trainable_variables() ] self.assign_op = [ tf.assign(t, w) for t, w in zip(tf.trainable_variables(), self.inputweights) ] # accuracy self.predictions = tf.sigmoid(logits) equal = tf.equal(self.labels, tf.round(self.predictions)) self.acc = tf.reduce_mean(tf.to_float(equal)) # miscellaneous self.filtnorms = utils.filtnorm(tf.trainable_variables())
def get_filtnorm(self, weights): return self.sess.run(utils.filtnorm(weights))