def __init__(self, input_size, num_units, weight_factor=1.0): self.W = tf.get_variable( "W", [input_size, num_units], initializer=lambda shape, dtype, partition_info: xavier_init( shape[0], shape[1], const=weight_factor)) self.Wr = tf.get_variable( "Wr", [num_units, num_units], initializer=lambda shape, dtype, partition_info: xavier_init( shape[0], shape[1], const=weight_factor))
def __init__(self, input_size, num_units, weight_factor=1.0): self.W = tf.get_variable( "W", [input_size, num_units], initializer=lambda shape, dtype, partition_info: xavier_init(shape[0], shape[1], const=weight_factor) ) self.Wr = tf.get_variable( "Wr", [num_units, num_units], initializer=lambda shape, dtype, partition_info: xavier_init(shape[0], shape[1], const=weight_factor) )
def _initialize_weights(self): all_weights = dict() all_weights['w1'] = tf.Variable( xavier_init(self.num_input, self.num_hidden)) all_weights['b1'] = tf.Variable( tf.zeros([self.num_hidden], dtype=tf.float32)) all_weights['w2'] = tf.Variable( tf.zeros([self.num_hidden, self.num_input], dtype=tf.float32)) all_weights['b2'] = tf.Variable( tf.zeros([self.num_input], dtype=tf.float32)) return all_weights
def function(*args, **kwargs): assert 'size' in kwargs, "Need size information" assert 'name' in kwargs, "Need name for output" assert len(args) > 0, "Empty arguments in function {}".format( kwargs["name"]) size = kwargs["size"] name = kwargs["name"] config = kwargs.get("config", {}) for k, v in config.iteritems(): if not k in kwargs: kwargs[k] = v user_act = kwargs.get("act") use_bias = kwargs.get("use_bias", True) weight_factor = kwargs.get("weight_factor", 1.0) use_weight_norm = kwargs.get("use_weight_norm", False) layers_num = kwargs.get("layers_num") reuse = kwargs.get("reuse", False) use_batch_norm = kwargs.get("use_batch_norm", False) scope_name = kwargs.get("scope_name", "") if scope_name: name = "{}/{}".format(scope_name, name) if use_weight_norm: use_bias = False epsilon = 1e-03 if not is_sequence(size): size = (size, ) if not layers_num is None: size = size * layers_num if layers_num is None: layers_num = len(size) else: assert layers_num == len( size ), "Got layers num not matched with size information. layers_num: {}, size: {}".format( layers_num, size) act = None if user_act: act = user_act assert not act is None or use_weight_norm == False, "Can't use batch normalization with linear activation function" with tf.variable_scope(name, reuse=reuse) as scope: inputs = args for l_id in xrange(layers_num): nout = size[l_id] layer_out = None #tf.zeros([None, nout], dtype=tf.float32) for idx, a in enumerate(inputs): a_shape = a.get_shape().as_list() nin = a_shape[-1] init = lambda shape, dtype, partition_info: xavier_init( nin, nout, const=weight_factor) vec_init = lambda shape, dtype, partition_info: xavier_vec_init( nout, const=weight_factor) zeros_init = lambda shape, dtype, partition_info: np.zeros( (nout, )) ones_init = lambda shape, dtype, partition_info: np.ones( (nout, )) if not use_weight_norm: w = tf.get_variable("W{}-{}".format(l_id, idx), [nin, nout], dtype=tf.float32, initializer=init) a_w = tf.matmul(a, w) else: V = tf.get_variable( "V{}-{}".format(l_id, idx), [nin, nout], dtype=tf.float32, initializer=init ) #tf.uniform_unit_scaling_initializer(factor=weight_factor)) g = tf.get_variable("g{}-{}".format(l_id, idx), [nout], dtype=tf.float32, initializer=vec_init) a_w = tf.matmul(a, V) a_w = a_w * g / tf.sqrt(tf.reduce_sum(tf.square(V), [0])) if use_bias: b = tf.get_variable("b{}-{}".format(l_id, idx), [nout], tf.float32, initializer=zeros_init) a_w = a_w + b if layer_out is None: layer_out = a_w else: layer_out = layer_out + a_w if use_batch_norm: batch_mean, batch_var = tf.nn.moments(layer_out, [0]) layer_out = (layer_out - batch_mean) / tf.sqrt(batch_var + epsilon) gamma = tf.get_variable("gamma{}".format(l_id), [nout], dtype=tf.float32, initializer=ones_init) beta = tf.get_variable("beta{}".format(l_id), [nout], dtype=tf.float32, initializer=zeros_init) layer_out = gamma * layer_out + beta inputs = (act(layer_out) if act else layer_out, ) return inputs[0]
from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf import os import sys import numpy as np sys.path.insert(0, "/home/alexeyche/prog/alexeyche-junk/cns/tf_dnn") from util import xavier_init, xavier_vec_init, shl, shm from function import function weight_factor = 0.1 init = lambda shape, dtype, partition_info: xavier_init( shape[0], shape[1], const=weight_factor) bias_init = lambda shape, dtype, partition_info: np.zeros((shape[0], )) small_init = lambda shape, dtype, partition_info: xavier_init( shape[0], shape[1], const=0.001) small_vec_init = lambda shape, dtype, partition_info: xavier_vec_init( shape[0], const=0.001) vec_init = lambda shape, dtype, partition_info: xavier_vec_init( shape[0], const=weight_factor) zero_init = lambda shape, dtype, partition_info: np.zeros((shape[0], shape[1])) mnist = input_data.read_data_sets(os.path.join(os.environ["HOME"], "mnist"), one_hot=True) input_size = 784 h0_size = 500 h1_size = 500 output_size = 10
def function(*args, **kwargs): assert 'size' in kwargs, "Need size information" assert 'name' in kwargs, "Need name for output" assert len(args) > 0, "Empty arguments in function {}".format(kwargs["name"]) size = kwargs["size"] name = kwargs["name"] config = kwargs.get("config", {}) for k, v in config.iteritems(): if not k in kwargs: kwargs[k] = v user_act = kwargs.get("act") use_bias = kwargs.get("use_bias", True) weight_factor = kwargs.get("weight_factor", 1.0) use_weight_norm = kwargs.get("use_weight_norm", False) layers_num = kwargs.get("layers_num") reuse = kwargs.get("reuse", False) use_batch_norm = kwargs.get("use_batch_norm", False) scope_name = kwargs.get("scope_name", "") if scope_name: name = "{}/{}".format(scope_name, name) if use_weight_norm: use_bias = False epsilon = 1e-03 if not is_sequence(size): size = (size,) if not layers_num is None: size = size*layers_num if layers_num is None: layers_num = len(size) else: assert layers_num == len(size), "Got layers num not matched with size information. layers_num: {}, size: {}".format(layers_num, size) act = None if user_act: act = user_act assert not act is None or use_weight_norm == False, "Can't use batch normalization with linear activation function" with tf.variable_scope(name, reuse=reuse) as scope: inputs = args for l_id in xrange(layers_num): nout = size[l_id] layer_out = None #tf.zeros([None, nout], dtype=tf.float32) for idx, a in enumerate(inputs): a_shape = a.get_shape().as_list() nin = a_shape[-1] init = lambda shape, dtype, partition_info: xavier_init(nin, nout, const = weight_factor) vec_init = lambda shape, dtype, partition_info: xavier_vec_init(nout, const = weight_factor) zeros_init = lambda shape, dtype, partition_info: np.zeros((nout,)) ones_init = lambda shape, dtype, partition_info: np.ones((nout,)) if not use_weight_norm: w = tf.get_variable("W{}-{}".format(l_id, idx), [nin, nout], dtype = tf.float32, initializer = init) a_w = tf.matmul(a, w) else: V = tf.get_variable("V{}-{}".format(l_id, idx), [nin, nout], dtype = tf.float32, initializer = init) #tf.uniform_unit_scaling_initializer(factor=weight_factor)) g = tf.get_variable("g{}-{}".format(l_id, idx), [nout], dtype = tf.float32, initializer = vec_init) a_w = tf.matmul(a, V) a_w = a_w * g/tf.sqrt(tf.reduce_sum(tf.square(V),[0])) if use_bias: b = tf.get_variable("b{}-{}".format(l_id, idx), [nout], tf.float32, initializer = zeros_init) a_w = a_w + b if layer_out is None: layer_out = a_w else: layer_out = layer_out + a_w if use_batch_norm: batch_mean, batch_var = tf.nn.moments(layer_out, [0]) layer_out = (layer_out - batch_mean) / tf.sqrt(batch_var + epsilon) gamma = tf.get_variable("gamma{}".format(l_id), [nout], dtype = tf.float32, initializer = ones_init) beta = tf.get_variable("beta{}".format(l_id), [nout], dtype = tf.float32, initializer = zeros_init) layer_out = gamma * layer_out + beta inputs = (act(layer_out) if act else layer_out,) return inputs[0]
import numpy as np from functools import partial from os.path import join as pj sys.path.insert(0, "/home/alexeyche/prog/alexeyche-junk/cns/tf_dnn") from util import xavier_init, xavier_vec_init, shl, shm from function import function from hopfield_util import symmetric_feedforward_weights, batch_outer, toy_setup, binary_setup, linear_setup np.random.seed(5) tf.set_random_seed(5) weight_factor = 0.1 init = lambda shape, dtype, partition_info: xavier_init(shape[0], shape[1], const = weight_factor) bias_init = lambda shape, dtype, partition_info: np.zeros((shape[0],)) input_size = 10 hidden_size = 10 output_size = 4 batch_size = 10 epsilon = 0.5 beta = 1.0 epochs = 30 tmp_dir = pj(os.environ["HOME"], "hop") [os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f.endswith(".png")] steps_num = 20 net_size = input_size + hidden_size + output_size