Exemplo n.º 1
0
def spectral_radius(xent, regularizable, projvec_beta=.55):
    """returns principal eig of the hessian"""

    # create initial projection vector (randomly and normalized)
    projvec_init = [
        np.random.randn(*r.get_shape().as_list()) for r in regularizable
    ]
    magnitude = np.sqrt(np.sum([np.sum(p**2) for p in projvec_init]))
    projvec_init = projvec_init / magnitude

    # projection vector tensor variable
    with tf.variable_scope('projvec'):
        projvec = [
            tf.get_variable(name=r.op.name,
                            dtype=tf.float32,
                            shape=r.get_shape(),
                            trainable=False,
                            initializer=tf.constant_initializer(p))
            for r, p in zip(regularizable, projvec_init)
        ]

    # layer norm
    # norm_values = utils.layernormdev(regularizable)
    norm_values = utils.filtnorm(regularizable)
    projvec_mul_normvalues = [
        tf.multiply(f, p) for f, p in zip(norm_values, projvec)
    ]

    # get the hessian-vector product
    gradLoss = tf.gradients(xent, regularizable)
    hessVecProd = tf.gradients(gradLoss, regularizable, projvec_mul_normvalues)
    hessVecProd = [h * n for h, n in zip(hessVecProd, norm_values)]

    # principal eigenvalue: project hessian-vector product with that same vector
    xHx = utils.list2dotprod(projvec, hessVecProd)

    # comopute next projvec
    if args.curv:
        nextProjvec = [tf.random_normal(shape=p.get_shape()) for p in projvec]
    else:
        normHv = utils.list2norm(hessVecProd)
        unitHv = [tf.divide(h, normHv) for h in hessVecProd]
        nextProjvec = [
            tf.add(h, tf.multiply(p, projvec_beta))
            for h, p in zip(unitHv, projvec)
        ]
    normNextPv = utils.list2norm(nextProjvec)
    nextProjvec = [tf.divide(p, normNextPv) for p in nextProjvec]

    # diagnostics: dotprod and euclidean distance of new projection vector from previous
    projvec_corr = utils.list2dotprod(nextProjvec, projvec)

    # op to assign the new projection vector for next iteration
    with tf.control_dependencies([projvec_corr]):
        with tf.variable_scope('projvec_op'):
            projvec_op = [
                tf.assign(p, n) for p, n in zip(projvec, nextProjvec)
            ]

    return xHx, projvec_op, projvec_corr, projvec_mul_normvalues
Exemplo n.º 2
0
    def build_graph(self):
        '''build the simple neural network computation graph'''

        # inputs to network
        self.inputs = tf.placeholder(dtype=tf.float32,
                                     shape=(None, self.args.ndim),
                                     name='inputs')
        self.labels = tf.placeholder(dtype=tf.float32,
                                     shape=(None, self.args.nclass),
                                     name='labels')
        self.is_training = tf.placeholder(dtype=tf.bool)  # training mode flag
        self.lr = tf.placeholder(tf.float32)
        self.speccoef = tf.placeholder(tf.float32)

        # forward prop
        a = self.inputs
        for l, nunit in enumerate(self.args.nhidden):
            a = tf.layers.dense(a, nunit, use_bias=True, activation='relu')
        logits = tf.layers.dense(a, self.args.nclass)
        xent = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels,
                                                       logits=logits)
        self.xent = tf.reduce_mean(xent)

        # weight decay and hessian reg
        regularizable = [
            t for t in tf.trainable_variables() if t.op.name.find('bias') == -1
        ]
        wdec = tf.global_norm(regularizable)**2
        self.spec, self.projvec_op, self.projvec_corr, self.eigvec = spectral_radius(
            self.xent, tf.trainable_variables(), self.args.projvec_beta)
        self.loss = self.xent + self.args.wdeccoef * wdec  # + self.speccoef*self.spec

        # gradient operations
        optim = tf.train.AdamOptimizer(self.lr)
        grads = tf.gradients(self.loss, tf.trainable_variables())
        grads, self.grad_norm = tf.clip_by_global_norm(
            grads, clip_norm=self.args.max_grad_norm)
        self.weight_norm = tf.global_norm(tf.trainable_variables())

        # training and assignment operations
        self.train_op = optim.apply_gradients(
            zip(grads, tf.trainable_variables()))
        self.inputweights = [
            tf.zeros_like(t) for t in tf.trainable_variables()
        ]
        self.assign_op = [
            tf.assign(t, w)
            for t, w in zip(tf.trainable_variables(), self.inputweights)
        ]

        # accuracy
        self.predictions = tf.sigmoid(logits)
        equal = tf.equal(self.labels, tf.round(self.predictions))
        self.acc = tf.reduce_mean(tf.to_float(equal))

        # miscellaneous
        self.filtnorms = utils.filtnorm(tf.trainable_variables())
Exemplo n.º 3
0
 def get_filtnorm(self, weights):
     return self.sess.run(utils.filtnorm(weights))