def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None): self.var_list = var_list self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.scale_grad_by_procs = scale_grad_by_procs size = sum(U.numel(v) for v in var_list) self.m = np.zeros(size, 'float32') self.v = np.zeros(size, 'float32') self.t = 0 self.setfromflat = U.SetFromFlat(var_list) self.getflat = U.GetFlat(var_list)
def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None): self.var_list = var_list self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.scale_grad_by_procs = scale_grad_by_procs size = sum(U.numel(v) for v in var_list) self.m = np.zeros(size, 'float32') self.v = np.zeros(size, 'float32') self.t = 0 self.setfromflat = U.SetFromFlat(var_list) self.getflat = U.GetFlat(var_list) self.comm = MPI.COMM_WORLD if comm is None and MPI is not None else comm
def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None, sess=None): """ A parallel MPI implementation of the Adam optimizer for TensorFlow https://arxiv.org/abs/1412.6980 :param var_list: ([TensorFlow Tensor]) the variables :param beta1: (float) Adam beta1 parameter :param beta2: (float) Adam beta1 parameter :param epsilon: (float) to help with preventing arithmetic issues :param scale_grad_by_procs: (bool) if the scaling should be done by processes :param comm: (MPI Communicators) if None, mpi4py.MPI.COMM_WORLD :param sess: (TensorFlow Session) if None, tf.get_default_session() """ self.var_list = var_list self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.scale_grad_by_procs = scale_grad_by_procs size = sum(tf_utils.numel(v) for v in var_list) # Exponential moving average of gradient values # "first moment estimate" m in the paper self.exp_avg = np.zeros(size, 'float32') # Exponential moving average of squared gradient values # "second raw moment estimate" v in the paper self.exp_avg_sq = np.zeros(size, 'float32') self.step = 0 self.setfromflat = tf_utils.SetFromFlat(var_list, sess=sess) self.getflat = tf_utils.GetFlat(var_list, sess=sess) self.comm = mpi4py.MPI.COMM_WORLD if comm is None else comm
def flatten_grads(var_list, grads): """Flattens a variables and their gradients. """ return tf.concat( [tf.reshape(grad, [U.numel(v)]) for (v, grad) in zip(var_list, grads)], 0)