def regression_approach(): # None for batch size and 784 is the image size flattened, 10 digits for classification x = tf.placeholder("float", shape=[None, 784], name="x_input") y_ = tf.placeholder("float", shape=[None, 10], name="y_input") # init of weights and bias W = tf.Variable(tf.zeros([784,10]), name="weights") b = tf.Variable(tf.zeros([10]), name="bias") # init vairables in a session # sess.run(tf.initialize_all_variables()) # predictions with tf.name_scope("prediction") as scope: y = tf.nn.softmax(tf.matmul(x, W) + b) w_hist = tf.histogram_summary("weights", W) b_hist = tf.histogram_summary("biases", b) y_hist = tf.histogram_summary("y", y) # cost func set to be cross entropy with tf.name_scope("cost_func") as scope: cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) ce_sum = tf.scalar_summary("cross_entropy", cross_entropy) # training happens after all is defined and steepest gradient descent is used with tf.name_scope("train") as scope: train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) # evaluate model with tf.name_scope("test") as scope: correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) accuracy_summary = tf.scalar_summary("accuracy", accuracy) # merge all the summaries merged = tf.merge_all_summaries() # Launch the graph with tf.Session() as sess: sess.run(tf.initialize_all_variables()) writer = tf.train.SummaryWriter('/home/vionlabs/Documents/vionlabs_weilun/machine_learning/tensorflow_testing/graph', graph_def=sess.graph_def) # tf.initialize_all_variables().run() for i in range(1000): if i % 10 == 0: # Record summary data, and the accuracy feed = {x: mnist.test.images, y_: mnist.test.labels} result = sess.run([merged, accuracy], feed_dict=feed) summary_str = result[0] acc = result[1] writer.add_summary(summary_str, i) print("Accuracy at step %s: %s" % (i, acc)) else: batch_xs, batch_ys = mnist.train.next_batch(100) feed = {x: batch_xs, y_: batch_ys} sess.run(train_step, feed_dict=feed) print accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels})
def _activation_summary(x): ''' 可視化用のサマリを作成 ''' tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) tf.histogram_summary(tensor_name + '/activations', x) tf.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
def training(loss, learning_rate, loss_key=None): """Sets up the training Ops. Creates a summarizer to track the loss over time in TensorBoard. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. Args: loss: Loss tensor, from loss(). learning_rate: The learning rate to use for gradient descent. loss_key: int giving stage of pretraining so we can store loss summaries for each pretraining stage Returns: train_op: The Op for training. """ if loss_key is not None: # Add a scalar summary for the snapshot loss. loss_summaries[loss_key] = tf.scalar_summary(loss.op.name, loss) else: tf.scalar_summary(loss.op.name, loss) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(learning_rate) # Create a variable to track the global step. global_step = tf.Variable(0, name='global_step', trainable=False) # Use the optimizer to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training step. train_op = optimizer.minimize(loss, global_step=global_step) return train_op, global_step
def produce_embeddings(source): """ Produce the embbedings from the one-hot vectors Args: source: 4D tensor, shape=(BATCH_SIZE, 1, S_ENGLISH, T_ENGLISH) Returns: 4D tensor, shape=(BATCH_SIZE, 1, S_ENGLISH, EMBEDDINGS_DIMENSION) """ with tf.variable_scope('Embeddings'): weights = tf.get_variable(name='weights', shape=[1,1,T_ENGLISH,EMBEDDINGS_DIMENSION], initializer=tf.random_normal_initializer(stddev=1.0/math.sqrt(float(T_ENGLISH))) ) weights_hist = tf.histogram_summary("weights-encode", weights) biases = tf.get_variable(name='biases', shape=[EMBEDDINGS_DIMENSION], initializer=tf.constant_initializer(0.0)) biases_hist = tf.histogram_summary("biases-encode", biases) embeddings = tf.nn.tanh(biases + tf.nn.conv2d(source, filter=weights, strides=[1,1,1,1], padding='VALID')) return embeddings
def nn_conv_layer(input_tensor, patch_size, num_channels,output_depth, layer_name, biases=False,act=None, pool=None): """Reusable code for making a simple neural net layer. """ # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('weights'): weights = weight_variable([patch_size,patch_size,num_channels,output_depth]) # print ("weights:%s"%(weights.get_shape())) variable_summaries(weights, layer_name + '/weights') if (biases==True): with tf.name_scope('biases'): biases = bias_variable([output_depth]) # print("biases:%s" % (biases.get_shape())) variable_summaries(biases, layer_name + '/biases') with tf.name_scope('conv2d'): # print("input:%s" % (input_tensor.get_shape())) preactivate = tf.nn.conv2d(input_tensor, weights, [1, 1, 1, 1], padding='SAME') tf.histogram_summary(layer_name + '/pre_activations', preactivate) print("preactivate:%s" % (preactivate.get_shape())) if (pool!=None): max_pool=pool(preactivate,ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME',name='max_pool') if (act!=None): activations = act(max_pool+biases, 'activation') # tf.histogram_summary(layer_name + '/activations', activations) return preactivate
def train(self, total_loss): loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) for l in losses + [total_loss]: tf.scalar_summary(l.op.name + ' (raw)', l) # Apply gradients, and add histograms with tf.control_dependencies([loss_averages_op]): opt = tf.train.AdamOptimizer() grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) for grad, var in grads: if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) # Track the moving averages of all trainable variables variable_averages = tf.train.ExponentialMovingAverage(Recognizer.MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def train(total_loss, global_step): num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary("learning_rate", lr) loss_averages_op = _add_loss_summaries(total_loss) with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) for grad, var in grads: if grad: tf.histogram_summary(var.op.name + "/gradients", grad) #variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op]): train_op = tf.no_op(name="train") return train_op
def _add_gradients_summaries(grads_and_vars): """Add histogram summaries to gradients. Note: The summaries are also added to the SUMMARIES collection. Args: grads_and_vars: A list of gradient to variable pairs (tuples). Returns: The _list_ of the added summaries for grads_and_vars. """ summaries = [] for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): grad_values = grad.values else: grad_values = grad summaries.append(tf.histogram_summary(var.op.name + ':gradient', grad_values)) summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm', tf.global_norm([grad_values]))) else: tf.logging.info('Var %s has no gradient', var.op.name) return summaries
def pool_layer(self, input_, ksize, stride, name): with tf.variable_scope(name): pooled = self.max_pool(input_, ksize, stride, name="name") tf.histogram_summary(name + "/pooled", pooled) return pooled
def __init__(self, config): self.config = config self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input') self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels') self.labels_one_hot = tf.one_hot(indices=self.labels, depth=config.output_dim, on_value=1.0, off_value=0.0, axis=-1) self.gru = GRUCell(config.hidden_state_dim) embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0)) self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)] outputs, last_slu_state = tf.nn.rnn( cell=self.gru, inputs=inputs, dtype=tf.float32,) w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0)) self.logits = logits_bo = tf.matmul(last_slu_state, w_project) tf.histogram_summary('logits', logits_bo) self.probabilities = tf.nn.softmax(logits_bo) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot)) self.predict = tf.nn.softmax(logits_bo) # TensorBoard self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy') tf.scalar_summary('CCE loss', self.loss) tf.scalar_summary('Accuracy', self.accuracy) self.tb_info = tf.merge_all_summaries()
def inference(images): """ Build the MNIST model """ # Hidden 1 with tf.name_scope('hidden1'): weights = tf.Variable( tf.truncated_normal([IMAGE_PIXELS, LAYER_SIZE], stddev= 1.0 / math.sqrt(float(IMAGE_PIXELS))), name='weights') biases = tf.Variable(tf.zeros([LAYER_SIZE]), name='biases') hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) # Add summary ops to collect data tf.histogram_summary('weights', weights) tf.histogram_summary('biases', biases) # Output Layer - is this correct? does this layer have any weights? with tf.name_scope('softmax_linear'): weights = tf.Variable( tf.truncated_normal([LAYER_SIZE, NUM_CLASSES], stddev=1.0 / math.sqrt(float(LAYER_SIZE))), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = logSoftMax(tf.matmul(hidden1, weights) + biases) return logits
def train(total_loss, global_step, learning_rate=INITIAL_LEARNING_RATE): lr = tf.train.exponential_decay(learning_rate, global_step, DECAY_STEPS,#number of steps required for it to decay LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary('learning_rate', lr) #compute gradient step with tf.control_dependencies([total_loss]): opt = tf.train.MomentumOptimizer(lr, momentum=0.95) grads = opt.compute_gradients(total_loss) #if we wanted to clip the gradients #would apply the operation here #apply the gradients apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) for grad, var in grads: if grad is not None: print("Found gradients for: ", var.op.name) tf.histogram_summary(var.op.name + "/gradients", grad) with tf.control_dependencies([apply_gradient_op]): train_op = tf.no_op(name="train") #opt = tf.train.GradientDescentOptimizer(lr).minimize(total_loss, global_step=global_step) # grads = opt.compute_gradients(total_loss) return train_op
def inference(self, images, z): print "="*100 print "images DCGAN inference:" print images.get_shape() print "="*100 self.z_sum = tf.histogram_summary("z", z) # Generative print "generative" self.generator = Generative() self.G = self.generator.inference(z) # Discriminative print "discriminative from images" self.discriminator = Discriminative() self.D, self.D_logits = self.discriminator.inference(images) print "discriminative for sample from noize" self.sampler = self.generator.sampler(z) self.D_, self.D_logits_ = self.discriminator.inference(self.G, reuse=True) self.d_sum = tf.histogram_summary("d", self.D) self.d__sum = tf.histogram_summary("d_", self.D_) self.G_sum = tf.image_summary("G", self.G) return images, self.D_logits, self.D_logits_, self.G_sum, self.z_sum, self.d_sum, self.d__sum
def conv_layer(input, filter_shape, strides=[1, 1, 1, 1], keep_prob=1): """ Adds a convolutional layer to the graph. Creates filters and biases, computes the convolutions, passes the output through a leaky ReLU activation function and applies dropout. Equivalent to calling conv_op()->leaky_relu()->dropout(). Args: input: A tensor of floats with shape [batch_size, input_height, input_width, input_depth]. The input volume. filter_shape: A list of 4 integers with shape [filter_height, filter_width, input_depth, output_depth]. This determines the size and number of filters of the convolution. strides: A list of 4 integers. The amount of stride in the four dimensions of the input. keep_prob: A float. Probability of dropout in the layer. Returns: A tensor of floats with shape [batch_size, output_height, output_width, output_depth]. The product of the convolutional layer. """ # conv -> relu -> dropout conv = conv_op(input, filter_shape, strides) relu = leaky_relu(conv) output = dropout(relu, keep_prob) # Summarize activations scope = tf.get_default_graph()._name_stack # No easier way tf.histogram_summary(scope + '/activations', output) return output
def train(loss, learning_rate): """ Sets up an ADAM optimizer, computes gradients and updates variables. Args: loss: A float. The loss function to minimize. learning_rate: A float. The learning rate for ADAM. Returns: train_op: The operation to run for training. global_step: The current number of training steps made by the optimizer. """ # Set optimization parameters global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.995, epsilon=1e-06) # Compute and apply gradients gradients = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(gradients, global_step=global_step) # Summarize gradients for gradient, variable in gradients: if gradient is not None: tf.histogram_summary(variable.op.name + '/gradients', gradient) return train_op, global_step
def nn_layer(input_tensor, input_dim, output_dim, layer_name, act = tf.tanh): """ Creates and returns NN layer input_tensor -- TF tensor at layer input input_dim -- size of layer input output_dim -- size of layer output layer_name -- name of the layer for summaries (statistics) act -- nonlinear activation function """ with tf.name_scope(layer_name): with tf.name_scope('weights'): weights = weight_variable(layer_name+'/weights',[input_dim, output_dim]) variable_summaries(weights, layer_name+'/weights') with tf.name_scope('biases'): biases = bias_variable(layer_name+'/biases',[output_dim]) variable_summaries(biases, layer_name+'/biases') with tf.name_scope('WX_plus_b'): preactivate = tf.matmul(input_tensor, weights)+biases tf.histogram_summary(layer_name+'/pre_activations', preactivate) if act is not None: activations = act(preactivate, 'activation') else: activations = preactivate tf.histogram_summary(layer_name+'/activations', activations) return activations
def expectation_maximization_step(self, x): # probability of emission sequence obs_prob_seq = tf.gather(self.E, x) with tf.name_scope('Forward_Backward'): self.forward_backward(obs_prob_seq) with tf.name_scope('Re_estimate_transition'): new_T0, new_transition = self.re_estimate_transition(x) with tf.name_scope('Re_estimate_emission'): new_emission = self.re_estimate_emission(x) with tf.name_scope('Check_Convergence'): converged = self.check_convergence(new_T0, new_transition, new_emission) with tf.name_scope('Update_parameters'): self.T0 = tf.assign(self.T0, new_T0) self.E = tf.assign(self.E, new_emission) self.T = tf.assign(self.T, new_transition) #self.count = tf.assign_add(self.count, 1) with tf.name_scope('histogram_summary'): _ = tf.histogram_summary(self.T0.name, self.T0) _ = tf.histogram_summary(self.T.name, self.T) _ = tf.histogram_summary(self.E.name, self.E) return converged
def __init__(self): self.state_batch_placeholder = tf.placeholder( tf.float32, shape=(None, NUM_TILES)) self.targets_placeholder = tf.placeholder(tf.float32, shape=(None,)) self.actions_placeholder = tf.placeholder(tf.int32, shape=(None,)) self.placeholders = (self.state_batch_placeholder, self.targets_placeholder, self.actions_placeholder) self.weights, self.biases, self.activations = build_inference_graph( self.state_batch_placeholder, HIDDEN_SIZES) self.q_values = self.activations[-1] self.loss = build_loss(self.q_values, self.targets_placeholder, self.actions_placeholder) self.train_op, self.global_step, self.learning_rate = ( build_train_op(self.loss)) tf.scalar_summary("Average Target", tf.reduce_mean(self.targets_placeholder)) tf.scalar_summary("Learning Rate", self.learning_rate) tf.scalar_summary("Loss", self.loss) tf.histogram_summary("States", self.state_batch_placeholder) tf.histogram_summary("Targets", self.targets_placeholder) self.init = tf.initialize_all_variables() self.summary_op = tf.merge_all_summaries()
def add_latent(self, name, init_mean=None, init_stddev=1e-6, transform=None, shape=None, point_estimate=False): if init_mean is None: init_mean = np.random.randn() with tf.name_scope("latent_" + name) as scope: latent = {} latent["q_mean"] = tf.Variable(init_mean, name="q_mean") if point_estimate: latent["q_stddev"] = None latent["q_entropy"] = 0.0 else: latent["q_stddev"] = tf.Variable(init_stddev, name="q_stddev") latent["q_entropy"] = dists.gaussian_entropy(stddev=latent["q_stddev"]) latent["transform"] = transform # TODO: infer shape, and make sure that # shapes of q_mean and q_stddev match #if shape is None: # shape = _infer_shape(init_mean, init_stddev) latent["shape"] = shape tf.histogram_summary("latent_%s/q_mean" % name, latent["q_mean"]) if not point_estimate: tf.histogram_summary("latent_%s/q_stddev" % name, latent["q_stddev"]) self.latents[name] = latent
def deconv_layer(input_tensor, mode_tensor, weight_init, filter_size, filter_stride, num_filters, in_channels, output_size, nonlinear_func, use_batchnorm, name): # Initialize variables weight_shape = [filter_size, filter_size, num_filters, in_channels] initializer = tf.random_normal_initializer(stddev=weight_init) deconv_weights = tf.get_variable(name + '/weights', shape=weight_shape, initializer=initializer) bias = tf.get_variable(name + '/bias', shape=[num_filters], initializer=tf.constant_initializer()) # Apply deconvolution output_shape = [FLAGS.batch_size, output_size, output_size, num_filters] stride = [1, filter_stride, filter_stride, 1] deconv = tf.nn.conv2d_transpose(input_tensor, deconv_weights, output_shape, stride, padding='SAME', name=name + '/deconv') deconv = tf.nn.bias_add(deconv, bias, name=name + '/deconv_bias') # Apply batchnorm if use_batchnorm: deconv = batch_norm(deconv, num_filters, tf.equal(mode_tensor, 'train'), name + '/bn') activation = nonlinear_func(tf.nn.bias_add(deconv, bias), name=name + '/activation') if not tf.get_variable_scope().reuse: tf.histogram_summary('summary/weights/' + name, deconv_weights) tf.histogram_summary('summary/activations/' + name, activation) return activation
def dcnn_layer(input_tensor, convolution_shape,output_shape, padding, layer_name, config, act=tf.nn.relu,strides=[1,1,1,1]): # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('weights'): name = layer_name + '/weights' weights = weight_variable(convolution_shape, name, config) variable_summaries(weights, name) with tf.name_scope('biases'): biases = bias_variable([convolution_shape[-1]], config) variable_summaries(biases, layer_name + '/biases') with tf.name_scope('convolution'): preactivate = conv2d_transpose(input_tensor, weights,output_shape, padding=padding,strides=strides) + biases tf.histogram_summary(layer_name + '/pre_activations', preactivate) # Combine the feature maps if this is the last deconvolution if output_shape[-1] == 1: activations = act(tf.reduce_mean(preactivate,3,keep_dims=True), 'activation') else: activations = act(preactivate,'activation') tf.histogram_summary(layer_name + '/activations', activations) print layer_name + ' Shape: ', weights.get_shape(), ' with bias ', biases.get_shape(), ' padding', padding shape = activations.get_shape() print ' output : ', shape return activations, weights
def conv_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu, fully_connected=False): """ Makes a simple convolutional layer based on input and output dimensions. input_tensor: A tensor of the input data from the previous layer (of shape [a, b, c, d]) Returns the pooled tensor after CONV -> ACT -> POOL """ with tf.name_scope(layer_name): with tf.name_scope("weights"): weights = weight_variable([input_dim, output_dim]) if fully_connected else weight_variable([5, 5, input_dim, output_dim]) variable_summaries(weights, layer_name + '/weights') with tf.name_scope("biases"): bias = bias_variable([output_dim]) variable_summaries(bias, layer_name + '/bias') if fully_connected: with tf.name_scope("fully_connected"): final = act(tf.matmul(input_tensor, weights) + bias) tf.histogram_summary(layer_name + '/fully_connected', final) return final else: with tf.name_scope("convolution"): convolution = act(conv2d(input_tensor, weights) + bias) tf.histogram_summary(layer_name + '/convolution', convolution) pooled = max_pool_2x2(convolution) return pooled
def _deconv(inpOp, kH, kW, nOut, dH=1, dW=1, relu=True, name=None): global deconv_counter global parameters if not name: name = 'deconv' + str(deconv_counter) deconv_counter += 1 with tf.variable_scope(name) as scope: nIn = int(inpOp.get_shape()[-1]) in_shape = inpOp.get_shape() stddev = 1e-3 kernel = tf.get_variable('weights',[kH, kW, nOut, nIn], initializer=tf.random_normal_initializer(stddev=(kH*kW*nIn)**0.5*stddev)) conv = tf.nn.deconv2d(inpOp, kernel, [int(in_shape[0]),int(in_shape[1]),int(in_shape[2]),nOut], [1, 1, 1, 1], padding="SAME") biases = tf.get_variable('biases', [nOut], initializer=tf.constant_initializer(value=0.0)) bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()) if relu: bias = tf.nn.relu(bias, name='relu') #parameters += [kernel, biases] #bias = tf.Print(bias, [tf.sqrt(tf.reduce_mean(tf.square(inpOp - tf.reduce_mean(inpOp))))], message=kernel.name) tf.histogram_summary(bias.name+"/output", bias) tf.image_summary(bias.name+"/output", bias[:,:,:,0:3]) #tf.image_summary(scope+"/depth_weight", depthwise_filter) # tf.image_summary(scope+"/point_weight", pointwise_filter) return bias
def dense(self, width=100, act=tf.nn.relu): """ Fully connected layer. It does a matrix multiply, bias add, and then uses relu to nonlinearize. """ input_tensor = self.layers[-1]["activations"] layer_name = "dense" + str(len([l for l in self.layers if l["type"]=="dense"])) input_dim = functools.reduce(operator.mul, input_tensor.get_shape()[1:].as_list(), 1) input_tensor = tf.reshape(input_tensor, (-1, input_dim)) # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('weights'): weights = weight_variable([input_dim, width]) variable_summaries(weights, layer_name + '/weights') with tf.name_scope('biases'): biases = bias_variable([width]) variable_summaries(biases, layer_name + '/biases') with tf.name_scope('Wx_plus_b'): preactivate = tf.matmul(input_tensor, weights) + biases activations = act(preactivate, 'activation') tf.histogram_summary(layer_name + '/activations', activations) self.layers.append( { "activations": activations, "weights": weights, "biases": biases, "type": "dense" } ) return self
def _conv(inpOp, kH, kW, nOut, dH=1, dW=1, relu=True): global conv_counter global parameters name = 'conv' + str(conv_counter) conv_counter += 1 with tf.name_scope(name) as scope: nIn = int(inpOp.get_shape()[-1]) stddev = 5e-3 kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut], dtype=tf.float32, stddev=(kH*kW*nIn)**0.5*stddev), name='weights') conv = tf.nn.conv2d(inpOp, kernel, [1, 1, 1, 1], padding="SAME") biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), trainable=True, name='biases') bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()) if relu: bias = tf.nn.relu(bias, name=scope) #parameters += [kernel, biases] #bias = tf.Print(bias, [tf.sqrt(tf.reduce_mean(tf.square(inpOp - tf.reduce_mean(inpOp))))], message=kernel.name) tf.histogram_summary(scope+"/output", bias) tf.image_summary(scope+"/output", bias[:,:,:,0:3]) tf.image_summary(scope+"/kernel_weight", tf.expand_dims(kernel[:,:,0:3,0], 0)) # tf.image_summary(scope+"/point_weight", pointwise_filter) return bias
def train(lr, total_loss, global_step): # Variables that affect learning rate. # Compute gradients. #with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) # Add histograms for gradients. for i, (grad, var) in enumerate(grads): if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) grads[i] = (tf.clip_by_norm(grad, 5), var) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op
def bn(self, act=tf.nn.relu): """ Batch normalization. See: http://arxiv.org/pdf/1502.03167v3.pdf Based on implementation found at: http://www.r2rt.com/posts/implementations/2016-03-29-implementing-batch-normalization-tensorflow/ """ # Adding a name scope ensures logical grouping of the layers in the graph. layer_name = "bn" + str(len([l for l in self.layers if l["type"]=="bn"])) input_tensor = self.layers[-1]["activations"] with tf.name_scope(layer_name): dim = input_tensor.get_shape()[1:] # 64, 1, 10, 100 beta = tf.Variable(tf.zeros(dim)) scale = tf.Variable(tf.ones(dim)) variable_summaries(beta, layer_name + "/beta") variable_summaries(scale, layer_name + "/scale") z = input_tensor batch_mean, batch_var = tf.nn.moments(input_tensor,[0]) epsilon = 1e-3 z_hat = (z - batch_mean) / tf.sqrt(batch_var + epsilon) bn_z = scale * z_hat + beta activations = act(bn_z, 'activation') tf.histogram_summary(layer_name + '/activations', activations) self.layers.append({ "activations": activations, "type": "bn"}) return self
def run_training(cost_threshold=FLAGS.cost_threshold, max_steps=FLAGS.max_steps): global setup_done cost_value = 1e9 accuracy_value = 0.0 # if setup_done is False: setup_done = True opt = tf.train.AdamOptimizer() # try: #opt = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) i_trains = [s.idx for s in trains] i_valids = [s.idx for s in valids] i_tests = [s.idx for s in tests] i_all = [s.idx for s in sentences] logits = batch_logits(i_ss, activations.ref()) labs = batch_labels(i_ss) loss = calc_loss(logits, labs) i_ss_accuracy = accuracy(logits, labs) #v_labs = batch_labels(valid_ss) #v_logits = batch_logits(valid_ss, activations.ref()) #v_loss = calc_loss(v_logits, v_labs) #train_accuracy = accuracy(logits, labs) #valid_accuracy = accuracy(v_logits, v_labs) # test_accuracy = accuracy(i_tests, activations.ref()) train_op = opt.minimize(loss) #tf.histogram_summary('activations', activations) tf.histogram_summary('samples', i_ss) tf.scalar_summary('loss', loss) #tf.scalar_summary('training accuracy', train_accuracy) tf.scalar_summary('validation accuracy', i_ss_accuracy) # tf.scalar_summary('test accuracy', test_accuracy) merged = tf.merge_all_summaries() sess.run(tf.initialize_all_variables()) writer = tf.train.SummaryWriter( '/Users/rgobbel/src/pymisc/rntn_tf/tf_logs', sess.graph) # except Exception as exc: # print('Exception: {0}'.format(exc)) # setup_done = False f_dict[i_ss] = random.sample(i_trains, FLAGS.batch_size) _, cost_value = sess.run([train_op, loss], feed_dict=f_dict) #f_dict[valid_ss] = i_valids _ = sess.run(zero_activations(activations.ref()), feed_dict=f_dict) print('starting') accuracy_value = sess.run([i_ss_accuracy], feed_dict=f_dict) for step in range(max_steps): #_ = sess.run(zero_activations(activations.ref()), feed_dict=f_dict) f_dict[i_ss] = random.sample(i_trains, FLAGS.batch_size) #logits = batch_logits(i_ss, activations.ref()) #labs = batch_labels(i_ss) _, _, cost_value, _ = sess.run([tf.pack([i_ss]), train_op, loss], feed_dict=f_dict) #_ = sess.run(zero_activations(activations.ref()), feed_dict=f_dict) f_dict[i_ss] = i_valids _, valid_accuracy_value = sess.run([loss, i_ss_accuracy], feed_dict=f_dict) (summ,) = sess.run([merged], feed_dict=f_dict) # summ = sess.run([merged], feed_dict=f_dict) writer.add_summary(summ, step) writer.flush() print('.', end='', flush=True) if cost_value < cost_threshold: return step, cost_value, valid_accuracy_value return max_steps, cost_value, valid_accuracy_value
def conv_nn_layer(input_tensor, window_width, window_height, input_dim, output_dim, layer_name, act=tf.nn.relu): """ Defines a convolutional neural network layer """ # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope(layer_name): # Define layer weights with tf.name_scope('weights'): weights = weight_variable([window_width, window_height, input_dim, output_dim]) variable_summaries(weights, layer_name + '/weights') # Define biases with tf.name_scope('biases'): biases = bias_variable([output_dim]) variable_summaries(biases, layer_name + '/biases') # Convolve weights on image with tf.name_scope('preactivation'): preactivate = conv2d(input_tensor, weights) + biases tf.histogram_summary(layer_name + '/pre_activations', preactivate) # Determine layer activation activations = act(preactivate, 'activation') tf.histogram_summary(layer_name + '/activations', activations) return activations
def _process(self, grads): for grad, var in grads: tf.histogram_summary(var.op.name + '/grad', grad) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, tf.sqrt(tf.reduce_mean(tf.square(grad)), name=var.op.name + '/gradRMS')) return grads
for i in range(n_samples): x_i_encode = make_conv_net(x_i[:, i, :, :, :], scope, tie or i > 0, not x_i_learn) x_i_inv_mag = tf.rsqrt( tf.clip_by_value( tf.reduce_sum(tf.square(x_i_encode), 1, keep_dims=True), eps, float("inf"))) dotted = tf.squeeze( tf.batch_matmul(tf.expand_dims(x_hat_encode, 1), tf.expand_dims(x_i_encode, 2)), [ 1, ]) cos_sim_list.append(dotted * x_i_inv_mag) #*x_hat_inv_mag cos_sim = tf.concat(1, cos_sim_list) tf.histogram_summary('cos sim', cos_sim) weighting = tf.nn.softmax(cos_sim) label_prob = tf.squeeze(tf.batch_matmul(tf.expand_dims(weighting, 1), y_i)) tf.histogram_summary('label prob', label_prob) top_k = tf.nn.in_top_k(label_prob, y_hat_ind, 1) acc = tf.reduce_mean(tf.to_float(top_k)) tf.scalar_summary('train avg accuracy', acc) correct_prob = tf.reduce_sum( tf.log(tf.clip_by_value(label_prob, eps, 1.0)) * y_hat, 1) loss = tf.reduce_mean(-correct_prob, 0) tf.scalar_summary('loss', loss) optim = tf.train.GradientDescentOptimizer(learning_rate) #optim = tf.train.AdamOptimizer(learning_rate) grads = optim.compute_gradients(loss) grad_summaries = [
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ###################### # Config model_deploy# ###################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### #dataset = dataset_factory.get_dataset( #FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) dataset = get_train_data.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the network # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset #train_image_size = FLAGS.train_image_size or network_fn.default_image_size #image = image_preprocessing_fn(image, train_image_size, train_image_size) image = tf.image.convert_image_dtype(image, dtype=tf.float32) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weight=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weight=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.histogram_summary('activations/' + end_point, x)) summaries.add( tf.scalar_summary('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.histogram_summary(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add( tf.scalar_summary('learning_rate', learning_rate, name='learning_rate')) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add( tf.scalar_summary('total_loss', total_loss, name='total_loss')) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.merge_summary(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
optimizer = tf.train.AdamOptimizer(0.001) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp))
def buildModel(self, inMatFilename = None): if(inMatFilename): npWeights = loadWeights(inMatFilename) #Put all conv layers on gpu with tf.device('gpu:0'): with tf.name_scope("inputOps"): inputShape = self.dataObj.inputShape #Get convolution variables as placeholders self.inputImage = node_variable([None, inputShape[0], inputShape[1], inputShape[2]], "inputImage") self.gt = node_variable([None, 1], "gt") #Model variables for convolutions with tf.name_scope("Conv1Ops"): if(inMatFilename): self.W_conv1 = weight_variable_fromnp(npWeights["conv1_w"], "w_conv1") self.B_conv1 = weight_variable_fromnp(npWeights["conv1_b"], "b_conv1") else: self.W_conv1 = weight_variable_fromnp(np.zeros((11, 11, 3, 64), dtype=np.float32), "w_conv1") self.B_conv1 = weight_variable_fromnp(np.zeros((64), dtype=np.float32), "b_conv1") ##First conv layer is 11x11, 3 input channels into 64 output channels #self.W_conv1 = weight_variable_xavier([11, 11, 3, 64], "w_conv1", conv=True) #self.B_conv1 = bias_variable([64], "b_conv1") self.h_conv1 = tf.nn.relu(conv2d(self.inputImage, self.W_conv1, "conv1") + self.B_conv1) self.h_norm1 = tf.nn.local_response_normalization(self.h_conv1, name="LRN1") #relu is communative op, so do relu after pool for efficiency self.h_pool1 = maxpool_2x2(self.h_norm1, "pool1") with tf.name_scope("Conv2Ops"): #Second conv layer is 5x5 conv, into 256 output channels if(inMatFilename): self.W_conv2 = weight_variable_fromnp(npWeights["conv2_w"], "w_conv2") self.B_conv2 = weight_variable_fromnp(npWeights["conv2_b"], "b_conv2") else: self.W_conv2 = weight_variable_fromnp(np.zeros((5, 5, 64, 256), dtype=np.float32), "w_conv2") self.B_conv2 = weight_variable_fromnp(np.zeros((256), dtype=np.float32), "b_conv2") #self.W_conv2 = weight_variable_xavier([5, 5, 64, 256], "w_conv2", conv=True) #self.B_conv2 = bias_variable([256], "b_conv2") self.h_conv2 = tf.nn.relu(conv2d(self.h_pool1, self.W_conv2, "conv2") + self.B_conv2) self.h_norm2 = tf.nn.local_response_normalization(self.h_conv2, name="LRN2") self.h_pool2 = maxpool_2x2(self.h_norm2, "pool2") #Third layer is 3x3 conv into 256 output channels #No pooling with tf.name_scope("Conv3Ops"): #Second conv layer is 5x5 conv, into 256 output channels if(inMatFilename): self.W_conv3 = weight_variable_fromnp(npWeights["conv3_w"], "w_conv3") self.B_conv3 = weight_variable_fromnp(npWeights["conv3_b"], "b_conv3") else: self.W_conv3 = weight_variable_fromnp(np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv3") self.B_conv3 = weight_variable_fromnp(np.zeros((256), dtype=np.float32), "b_conv3") #self.W_conv3 = weight_variable_xavier([3, 3, 256, 256], "w_conv3", conv=True) #self.B_conv3 = bias_variable([256], "b_conv3") self.h_conv3 = tf.nn.relu(conv2d(self.h_pool2, self.W_conv3, "conv3") + self.B_conv3, name="relu3") #Fourth layer is 3x3 conv into 256 output channels #No pooling with tf.name_scope("Conv4Ops"): #Second conv layer is 5x5 conv, into 256 output channels if(inMatFilename): self.W_conv4 = weight_variable_fromnp(npWeights["conv4_w"], "w_conv4") self.B_conv4 = weight_variable_fromnp(npWeights["conv4_b"], "b_conv4") else: self.W_conv4 = weight_variable_fromnp(np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv4") self.B_conv4 = weight_variable_fromnp(np.zeros((256), dtype=np.float32), "b_conv4") #self.W_conv4 = weight_variable_xavier([3, 3, 256, 256], "w_conv4", conv=True) #self.B_conv4 = bias_variable([256], "b_conv4") self.h_conv4 = tf.nn.relu(conv2d(self.h_conv3, self.W_conv4, "conv4") + self.B_conv4, name="relu4") #Fifth layer is 3x3 conv into 256 output channels #with pooling with tf.name_scope("Conv5Ops"): #Second conv layer is 5x5 conv, into 256 output channels if(inMatFilename): self.W_conv5 = weight_variable_fromnp(npWeights["conv5_w"], "w_conv5") self.B_conv5 = weight_variable_fromnp(npWeights["conv5_b"], "b_conv5") else: self.W_conv5 = weight_variable_fromnp(np.zeros((3, 3, 256, 256), dtype=np.float32), "w_conv5") self.B_conv5 = weight_variable_fromnp(np.zeros((256), dtype = np.float32), "b_conv5") #self.W_conv5 = weight_variable_xavier([3, 3, 256, 256], "w_conv5", conv=True) #self.B_conv5 = bias_variable([256], "b_conv5") self.h_conv5 = tf.nn.relu(conv2d(self.h_conv4, self.W_conv5, "conv5") + self.B_conv5) self.h_norm5 = tf.nn.local_response_normalization(self.h_conv5, name="LRN5") self.h_pool5 = maxpool_2x2(self.h_norm5, "pool5") #6th layer (not in paper) is 3x3 conv into 256 output channels #with pooling with tf.name_scope("Conv6Ops"): self.W_conv6 = weight_variable_xavier([3, 3, 256, 256], "w_conv6", conv=True) self.B_conv6 = bias_variable([256], "b_conv6") self.h_conv6 = conv2d(self.h_pool5, self.W_conv6, "conv6") + self.B_conv6 self.h_pool6 = tf.nn.relu(maxpool_2x2(self.h_conv6, "pool6"), name="relu6") self.keep_prob = tf.placeholder(tf.float32) #Next is 3 fully connected layers #We should have downsampled by 8 at this point #fc1 should have 4096 channels numInputs = (inputShape[0]/16) * (inputShape[1]/16) * 256 with tf.name_scope("FC1"): self.W_fc1 = weight_variable([numInputs, 2048], "w_fc1", 1e-6) self.B_fc1 = bias_variable([2048], "b_fc1") h_pool6_flat = tf.reshape(self.h_pool6, [-1, numInputs], name="pool6_flat") self.h_fc1 = tf.nn.relu(tf.matmul(h_pool6_flat, self.W_fc1, name="fc1") + self.B_fc1, "fc1_relu") self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.keep_prob) #Put all opt layers on cpu with tf.device('/cpu:0'): #fc2 should have 128 channels with tf.name_scope("FC2"): self.W_fc2 = weight_variable_xavier([2048, 128], "w_fc2", conv=False) self.B_fc2 = bias_variable([128], "b_fc2") self.h_fc2 = tf.nn.relu(tf.matmul(self.h_fc1_drop, self.W_fc2, name="fc2") + self.B_fc2, "fc2_relu") self.h_fc2_drop = tf.nn.dropout(self.h_fc2, self.keep_prob) #fc3 should have 16 channels #fc3 also uses a sigmoid function #We change it to tanh with tf.name_scope("FC3"): self.W_fc3 = weight_variable_xavier([128, 16], "w_fc3", conv=False) self.B_fc3 = bias_variable([16], "b_fc3") self.h_fc3 = tf.tanh(tf.matmul(self.h_fc2, self.W_fc3, name="fc3") + self.B_fc3, "fc3_relu") #Finally, fc4 condenses into 1 output value with tf.name_scope("FC4"): self.W_fc4 = weight_variable_xavier([16, 1], "w_fc4", conv=False) self.B_fc4 = bias_variable([1], "b_fc4") self.est = tf.matmul(self.h_fc3, self.W_fc4, name="est") + self.B_fc4 with tf.name_scope("Loss"): #Define loss self.loss = tf.reduce_mean(tf.square(self.gt - self.est))/2 with tf.name_scope("Opt"): #Define optimizer #self.optimizerAll = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss) #self.optimizerFC = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss, self.optimizerAll = tf.train.AdamOptimizer(self.learningRate).minimize(self.loss) self.optimizerFC = tf.train.AdamOptimizer(self.learningRate).minimize(self.loss, var_list=[self.W_conv6, self.B_conv6, self.W_fc1, self.B_fc1, self.W_fc2, self.B_fc2, self.W_fc3, self.B_fc3, self.W_fc4, self.B_fc4] ) #Summaries tf.scalar_summary('l2 loss', self.loss) tf.histogram_summary('input', self.inputImage) tf.histogram_summary('gt', self.gt) tf.histogram_summary('conv1', self.h_pool1) tf.histogram_summary('conv2', self.h_pool2) tf.histogram_summary('conv3', self.h_conv3) tf.histogram_summary('conv4', self.h_conv4) tf.histogram_summary('conv5', self.h_pool5) tf.histogram_summary('conv6', self.h_pool6) tf.histogram_summary('fc1', self.h_fc1) tf.histogram_summary('fc2', self.h_fc2) tf.histogram_summary('fc3', self.h_fc3) tf.histogram_summary('est', self.est) tf.histogram_summary('w_conv1', self.W_conv1) tf.histogram_summary('b_conv1', self.B_conv1) tf.histogram_summary('w_conv2', self.W_conv2) tf.histogram_summary('b_conv2', self.B_conv2) tf.histogram_summary('w_conv3', self.W_conv3) tf.histogram_summary('b_conv3', self.B_conv3) tf.histogram_summary('w_conv4', self.W_conv4) tf.histogram_summary('b_conv4', self.B_conv4) tf.histogram_summary('w_conv5', self.W_conv5) tf.histogram_summary('b_conv5', self.B_conv5) tf.histogram_summary('w_conv6', self.W_conv6) tf.histogram_summary('b_conv6', self.B_conv6) tf.histogram_summary('w_fc1', self.W_fc1) tf.histogram_summary('b_fc1', self.B_fc1) tf.histogram_summary('w_fc2', self.W_fc2) tf.histogram_summary('b_fc2', self.B_fc2) tf.histogram_summary('w_fc3', self.W_fc3) tf.histogram_summary('b_fc3', self.B_fc3) tf.histogram_summary('w_fc4', self.W_fc4) tf.histogram_summary('b_fc4', self.B_fc4) #Define saver self.saver = tf.train.Saver()
#train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess = tf.Session() #sess = tf.InteractiveSession() init = tf.global_variables_initializer() sess.run(init) #tf.contrib.deprecated.histogram_summary("Accuracy:", accuracy) #tf.summary.scalar_summary("Accuracy:", correct_prediction) tf.scalar_summary("Accuracy:", accuracy) tf.histogram_summary('weights', W) tf.histogram_summary('bias', b) tf.histogram_summary('softmax', tf_softmax) tf.histogram_summary('accuracy', accuracy) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter('./graphs', sess.graph) #tf.summary.histogram_summary('softmax', y) #number of interations
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) with tf.name_scope("cross_entropy") as scope: cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, Y)) ce_summ = tf.scalar_summary("cross_entropy", cross_entropy) with tf.name_scope("train") as scope: train = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) with tf.name_scope("accuracy") as scope: correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(y_conv, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) accuracy_summ = tf.scalar_summary("accuracy", accuracy) # Add histogram W_conv1_hist = tf.histogram_summary("wegiht1", W_conv1) W_conv2_hist = tf.histogram_summary("weight2", W_conv2) W_fc1_hist = tf.histogram_summary("weigth3", W_fc1) W_fc2_hist = tf.histogram_summary("weight4", W_fc2) b_conv1_hist = tf.histogram_summary("bias1", b_conv1) b_conv2_hist = tf.histogram_summary("bias2", b_conv2) b_fc1_hist = tf.histogram_summary("bias3", b_fc1) b_fc2_hist = tf.histogram_summary("bias4", b_fc2) with tf.Session() as sess: tf.initialize_all_variables().run() merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter("/tmp/CNN_logs", sess.graph) for i in range(1001): batch_xs, batch_ys = rotor.train.next_batch(batch_size) sess.run(train, feed_dict={X: batch_xs, Y: batch_ys, keep_prob: 0.5})
def _create_dilation_layer(self, input_batch, layer_index, dilation, global_condition_batch, output_width): '''Creates a single causal dilated convolution layer. Args: input_batch: Input to the dilation layer. layer_index: Integer indicating which layer this is. dilation: Integer specifying the dilation size. global_conditioning_batch: Tensor containing the global data upon which the output is to be conditioned upon. Shape: [batch size, 1, channels]. The 1 is for the axis corresponding to time so that the result is broadcast to all time steps. The layer contains a gated filter that connects to dense output and to a skip connection: |-> [gate] -| |-> 1x1 conv -> skip output | |-> (*) -| input -|-> [filter] -| |-> 1x1 conv -| | |-> (+) -> dense output |------------------------------------| Where `[gate]` and `[filter]` are causal convolutions with a non-linear activation at the output. Biases and global conditioning are omitted due to the limits of ASCII art. ''' variables = self.variables['dilated_stack'][layer_index] weights_filter = variables['filter'] weights_gate = variables['gate'] conv_filter = causal_conv(input_batch, weights_filter, dilation) conv_gate = causal_conv(input_batch, weights_gate, dilation) if global_condition_batch is not None: weights_gc_filter = variables['gc_filtweights'] conv_filter = conv_filter + tf.nn.conv1d(global_condition_batch, weights_gc_filter, stride=1, padding="SAME", name="gc_filter") weights_gc_gate = variables['gc_gateweights'] conv_gate = conv_gate + tf.nn.conv1d(global_condition_batch, weights_gc_gate, stride=1, padding="SAME", name="gc_gate") if self.use_biases: filter_bias = variables['filter_bias'] gate_bias = variables['gate_bias'] conv_filter = tf.add(conv_filter, filter_bias) conv_gate = tf.add(conv_gate, gate_bias) out = tf.tanh(conv_filter) * tf.sigmoid(conv_gate) # The 1x1 conv to produce the residual output weights_dense = variables['dense'] transformed = tf.nn.conv1d(out, weights_dense, stride=1, padding="SAME", name="dense") # The 1x1 conv to produce the skip output skip_cut = tf.shape(out)[1] - output_width out_skip = tf.slice(out, [0, skip_cut, 0], [-1, -1, -1]) weights_skip = variables['skip'] skip_contribution = tf.nn.conv1d(out_skip, weights_skip, stride=1, padding="SAME", name="skip") if self.use_biases: dense_bias = variables['dense_bias'] skip_bias = variables['skip_bias'] transformed = transformed + dense_bias skip_contribution = skip_contribution + skip_bias if self.histograms: layer = 'layer{}'.format(layer_index) tf.histogram_summary(layer + '_filter', weights_filter) tf.histogram_summary(layer + '_gate', weights_gate) tf.histogram_summary(layer + '_dense', weights_dense) tf.histogram_summary(layer + '_skip', weights_skip) if self.use_biases: tf.histogram_summary(layer + '_biases_filter', filter_bias) tf.histogram_summary(layer + '_biases_gate', gate_bias) tf.histogram_summary(layer + '_biases_dense', dense_bias) tf.histogram_summary(layer + '_biases_skip', skip_bias) input_cut = tf.shape(input_batch)[1] - tf.shape(transformed)[1] input_batch = tf.slice(input_batch, [0, input_cut, 0], [-1, -1, -1]) return skip_contribution, input_batch + transformed
def _add_split_loss(w, input_q, output_q): # Check input tensors' measurements assert len(w.get_shape()) == 2 or len(w.get_shape()) == 4 in_dim, out_dim = w.get_shape().as_list()[-2:] assert len(input_q.get_shape()) == 2 assert len(output_q.get_shape()) == 2 assert in_dim == input_q.get_shape().as_list()[1] assert out_dim == output_q.get_shape().as_list()[1] assert input_q.get_shape().as_list()[0] == output_q.get_shape().as_list()[ 0] # ngroups ngroups = input_q.get_shape().as_list()[0] assert ngroups > 1 # Add split losses to collections T_list = [] U_list = [] if input_q not in tf.get_collection('OVERLAP_LOSS_WEIGHTS'): tf.add_to_collection('OVERLAP_LOSS_WEIGHTS', input_q) print('\t\tAdd overlap & split loss for %s' % input_q.name) for i in range(ngroups): for j in range(ngroups): if i == j: continue T_list.append(tf.reduce_sum(input_q[i, :] * input_q[j, :])) U_list.append(tf.square(tf.reduce_sum(input_q[i, :]))) if output_q not in tf.get_collection('OVERLAP_LOSS_WEIGHTS'): print('\t\tAdd overlap & split loss for %s' % output_q.name) tf.add_to_collection('OVERLAP_LOSS_WEIGHTS', output_q) for i in range(ngroups): for j in range(ngroups): if i == j: continue T_list.append(tf.reduce_sum(output_q[i, :] * output_q[j, :])) U_list.append(tf.square(tf.reduce_sum(output_q[i, :]))) if T_list: tf.add_to_collection('OVERLAP_LOSS', tf.add_n(T_list)) if U_list: tf.add_to_collection('UNIFORM_LOSS', tf.add_n(U_list)) S_list = [] for i in range(ngroups): if len(w.get_shape()) == 4: w_reduce = tf.reduce_mean(tf.square(w), [0, 1]) wg_row = tf.matmul( tf.matmul(tf.diag(tf.square(1 - input_q[i, :])), w_reduce), tf.diag(tf.square(output_q[i, :]))) wg_row_l2 = tf.reduce_sum(tf.sqrt(tf.reduce_sum(wg_row, 1))) wg_col = tf.matmul( tf.matmul(tf.diag(tf.square(input_q[i, :])), w_reduce), tf.diag(tf.square(1 - output_q[i, :]))) wg_col_l2 = tf.reduce_sum(tf.sqrt(tf.reduce_sum(wg_col, 0))) else: # len(w.get_shape()) == 2 wg_row = tf.matmul(tf.matmul(tf.diag(1 - input_q[i, :]), w), tf.diag(output_q[i, :])) wg_row_l2 = tf.reduce_sum( tf.sqrt(tf.reduce_sum(wg_row * wg_row, 1))) wg_col = tf.matmul(tf.matmul(tf.diag(input_q[i, :]), w), tf.diag(1 - output_q[i, :])) wg_col_l2 = tf.reduce_sum( tf.sqrt(tf.reduce_sum(wg_col * wg_col, 0))) S_list.append(wg_row_l2 + wg_col_l2) S = tf.add_n(S_list) tf.add_to_collection('WEIGHT_SPLIT', S) # Add histogram for w if split losses are added` scope_name = tf.get_variable_scope().name tf.histogram_summary("%s/weights" % scope_name, w) print('\t\tAdd split loss for %s(%dx%d, %d groups)' \ % (tf.get_variable_scope().name, in_dim, out_dim, ngroups)) return
def _build_model(self): """ Builds the Tensorflow graph. """ # Placeholders for our input # 80x80 input matrix self.X_pl = tf.placeholder(shape=[None, 80, 80, 4], dtype=tf.uint8, name="X") # The TD target value self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name="y") # Integer id of which action was selected self.actions_pl = tf.placeholder(shape=[None], dtype=tf.int32, name="actions") X = tf.to_float(self.X_pl) batch_size = tf.shape(self.X_pl)[0] # Three convolutional layers conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=tf.nn.relu) conv2 = tf.contrib.layers.conv2d(conv1, 64, 4, 2, activation_fn=tf.nn.relu) conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1, activation_fn=tf.nn.relu) # Fully connected layers flattened = tf.contrib.layers.flatten(conv3) fc1 = tf.contrib.layers.fully_connected(flattened, 512) self.predictions = tf.contrib.layers.fully_connected( fc1, len(VALID_ACTIONS)) # Get the predictions for the chosen actions only gather_indices = tf.range(batch_size) * tf.shape( self.predictions)[1] + self.actions_pl self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices) # Calcualte the loss self.losses = tf.squared_difference(self.y_pl, self.action_predictions) self.loss = tf.reduce_mean(self.losses) # Optimizer Parameters from original paper self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6) self.train_op = self.optimizer.minimize( self.loss, global_step=tf.contrib.framework.get_global_step()) # Summaries for Tensorboard self.summaries = tf.merge_summary([ tf.scalar_summary("loss", self.loss), tf.histogram_summary("loss_hist", self.losses), tf.histogram_summary("q_values_hist", self.predictions), tf.scalar_summary("max_q_value", tf.reduce_max(self.predictions)) ])
# Reshape word vector word_vec_reshape = tf.reshape(word_vec, [batch_size, input_size, embed_size]) # LSTM(BiDirectional) BLSTM, each_output, sqlength = _bidirectionalLSTM(word_vec_reshape) # Dropout(Outputs) keep_prob_out = tf.placeholder(tf.float32) output_drop = tf.nn.dropout(BLSTM, keep_prob_out) # Weight & bias W = tf.Variable(tf.random_uniform([bi_hidden_size, tag_size], -1.0, 1.0)) b = tf.Variable(tf.random_uniform([tag_size], -1.0, 1.0)) W_hist = tf.histogram_summary("weights", W) b_hist = tf.histogram_summary("biases", b) # True Data y_ = tf.placeholder(tf.int32, [batch_size, input_size]) # Softmax # y = tf.nn.softmax(tf.matmul(output_drop, W) + b) y = tf.matmul(output_drop, W) yreshape = tf.reshape(y, [batch_size, input_size, tag_size]) # CRF crf_sqlengths = np.full(batch_size, input_size, dtype=np.int32) tf_crf_sqlengths = tf.constant(crf_sqlengths) crf_score, transition_params = _CRF(yreshape, y_, tf_crf_sqlengths)
def add_activation_summary(var): tf.histogram_summary(var.op.name + "/activation", var) tf.scalar_summary(var.op.name + "/sparsity", tf.nn.zero_fraction(var))
padding='SAME') x = tf.placeholder(tf.float32, shape=[None, 240, 320, 3]) y_ = tf.placeholder(tf.float32, shape=[None, 3]) W_conv1 = weight_variable([6, 6, 3, 16]) b_conv1 = bias_variable([16]) h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1) dead_ReLUs1 = tf.placeholder(tf.float32, shape=[1]) h_pool1 = max_pool_2x2(h_conv1) W_conv2 = weight_variable([6, 6, 16, 4]) b_conv2 = bias_variable([4]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) tf.histogram_summary('activations_layer_2', h_conv2) h_pool2 = max_pool_2x2(h_conv2) W_conv3 = weight_variable([6, 6, 4, 4]) b_conv3 = bias_variable([4]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) tf.histogram_summary('activations_layer_3', h_conv3) h_pool3 = max_pool_2x2(h_conv3) W_conv4 = weight_variable([6, 6, 4, 4]) b_conv4 = bias_variable([4]) h_conv4 = tf.nn.relu(conv2d(h_pool3, W_conv4) + b_conv4) tf.histogram_summary('activations_layer_4', h_conv4) h_pool4 = max_pool_2x2(h_conv4) W_fc1 = weight_variable([15 * 20 * 4, 4])
def _activation_summary(x, name_tensor=None): """Helper to create summaries for activations.""" if name_tensor is None: name_tensor = x.op.name tf.histogram_summary(name_tensor + '/activations', x) tf.scalar_summary(name_tensor + '/sparsity', tf.nn.zero_fraction(x))
def add_gradient_summary(grad, var): if grad is not None: tf.histogram_summary(var.op.name + "/gradient", grad)
"float", [None, 784]) # mnist data image of shape 28*28=784 output_tensor = tf.placeholder( "float", [None, 10]) # 0-9 digits recognition => 10 classes # Create a model # Set model weights weights = tf.Variable(tf.zeros([784, 10])) biases = tf.Variable(tf.zeros([10])) with tf.name_scope("Wx_b") as scope: # Construct a linear model model = tf.nn.softmax(tf.matmul(input_tensor, weights) + biases) # Softmax # Add summary ops to collect data weights_summary = tf.histogram_summary("weights", weights) biases_summary = tf.histogram_summary("biases", biases) # More name scopes will clean up graph representation with tf.name_scope("cost_function") as scope: # Minimize error using cross entropy # Cross entropy cost_function = -tf.reduce_sum(output_tensor * tf.log(model)) # Create a summary to monitor the cost function tf.scalar_summary("cost_function", cost_function) # Training initialisation with tf.name_scope("train") as scope: # Creates an optimization function that makes our model improve during training # Gradient descent
def add_to_regularization_and_summary(var): if var is not None: tf.histogram_summary(var.op.name, var) tf.add_to_collection("reg_loss", tf.nn.l2_loss(var))
def buildModel(self, inputShape): assert (self.nT % self.VStrideT == 0) assert (inputShape[0] % self.VStrideY == 0) assert (inputShape[1] % self.VStrideX == 0) V_T = int(self.nT / self.VStrideT) V_Y = int(inputShape[0] / self.VStrideY) V_X = int(inputShape[1] / self.VStrideX) V_Tp = int(self.patchSizeT / self.VStrideT) V_Yp = int(self.patchSizeY / self.VStrideY) V_Xp = int(self.patchSizeX / self.VStrideX) V_Ofp = int(inputShape[2] * self.VStrideT * self.VStrideY * self.VStrideX) self.imageShape = (self.batchSize, self.nT, inputShape[0], inputShape[1], inputShape[2]) self.WShape = (V_Tp, V_Yp, V_Xp, self.numV, V_Ofp) self.VShape = (self.batchSize, V_T, V_Y, V_X, self.numV) #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.inputImage = node_variable(self.imageShape, "inputImage") #Scale inputImage self.scaled_inputImage = self.inputImage / np.sqrt( self.patchSizeX * self.patchSizeY * inputShape[2]) #This is what it should be, but for now, we ignore the scaling with nT #self.scaled_inputImage = self.inputImage/np.sqrt(self.nT*self.patchSizeX*self.patchSizeY*inputShape[2]) self.reshape_inputImage = transpose5dData( self.scaled_inputImage, self.imageShape, self.VStrideT, self.VStrideY, self.VStrideX) with tf.name_scope("Dictionary"): self.V1_W = sparse_weight_variable(self.WShape, "V1_W") #self.V1_W = sparse_weight_variable((self.patchSizeY, self.patchSizeX, inputShape[2], self.numV), "V1_W") with tf.name_scope("weightNorm"): self.normVals = tf.sqrt( tf.reduce_sum(tf.square(self.V1_W), reduction_indices=[0, 1, 2, 4], keep_dims=True)) #self.normVals = tf.sqrt(tf.reduce_sum(tf.square(self.V1_W), reduction_indices=[0, 1, 2], keep_dims=True)) self.normalize_W = self.V1_W.assign(self.V1_W / self.normVals) with tf.name_scope("ISTA"): #Variable for activity self.V1_A = weight_variable(self.VShape, "V1_A", 1e-4) self.zeroConst = tf.zeros(self.VShape) self.t_V1_A = tf.select( tf.abs(self.V1_A) < self.zeroThresh, self.zeroConst, self.V1_A) #self.V1_A= weight_variable((self.batchSize, inputShape[0], inputShape[1], self.numV), "V1_A", .01) with tf.name_scope("Recon"): assert (self.VStrideT >= 1) assert (self.VStrideY >= 1) assert (self.VStrideX >= 1) #We build index tensor in numpy to gather self.recon = conv3d(self.V1_A, self.V1_W, "recon") self.t_recon = conv3d(self.t_V1_A, self.V1_W, "recon") with tf.name_scope("Error"): self.error = self.reshape_inputImage - self.recon self.t_error = self.reshape_inputImage - self.t_recon with tf.name_scope("Loss"): self.reconError = tf.reduce_sum(tf.square(self.error)) self.l1Sparsity = tf.reduce_sum(tf.abs(self.V1_A)) #Define loss self.loss = self.reconError / 2 + self.thresh * self.l1Sparsity self.t_reconError = tf.reduce_sum(tf.square(self.t_error)) self.t_l1Sparsity = tf.reduce_sum(tf.abs(self.t_V1_A)) #Define loss self.t_loss = self.t_reconError / 2 + self.thresh * self.t_l1Sparsity with tf.name_scope("Opt"): #Define optimizer #self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss, self.optimizerA = tf.train.AdamOptimizer( self.learningRateA).minimize(self.loss, var_list=[self.V1_A]) #self.optimizerW = tf.train.GradientDescentOptimizer(self.learningRateW).minimize(self.loss, self.optimizerW = tf.train.AdamOptimizer( self.learningRateW).minimize(self.loss, var_list=[self.V1_W]) with tf.name_scope("stats"): self.underThresh = tf.reduce_mean( tf.cast(tf.abs(self.V1_A) > self.zeroThresh, tf.float32)) self.errorStd = tf.sqrt( tf.reduce_mean( tf.square(self.error - tf.reduce_mean(self.error))) ) * np.sqrt(self.patchSizeY * self.patchSizeX * inputShape[2]) self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A)) self.t_errorStd = tf.sqrt( tf.reduce_mean( tf.square(self.t_error - tf.reduce_mean(self.t_error))) ) * np.sqrt(self.patchSizeY * self.patchSizeX * inputShape[2]) self.t_l1_mean = tf.reduce_mean(tf.abs(self.t_V1_A)) #Reshape weights for viewing self.reshape_weight = transpose5dWeight( self.V1_W, self.WShape, self.VStrideT, self.VStrideY, self.VStrideX) self.weightImages = tf.reshape( tf.transpose(self.reshape_weight, [3, 0, 1, 2, 4]), [ self.numV * self.patchSizeT, self.patchSizeY, self.patchSizeX, inputShape[2] ]) #For image viewing self.frameImages = self.inputImage[0, :, :, :, :] self.reshaped_recon = undoTranspose5dData( self.recon, self.imageShape, self.VStrideT, self.VStrideY, self.VStrideX) self.frameRecons = self.reshaped_recon[0, :, :, :, :] self.t_reshaped_recon = undoTranspose5dData( self.t_recon, self.imageShape, self.VStrideT, self.VStrideY, self.VStrideX) self.t_frameRecons = self.t_reshaped_recon[0, :, :, :, :] #For log of activities self.log_V1_A = tf.log(tf.abs(self.V1_A) + 1e-15) #Summaries self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum") self.s_recon = tf.scalar_summary('recon error', self.reconError, name="reconError") self.s_errorStd = tf.scalar_summary('errorStd', self.errorStd, name="errorStd") self.s_l1 = tf.scalar_summary('l1 sparsity', self.l1Sparsity, name="l1Sparsity") self.s_l1_mean = tf.scalar_summary('l1 mean', self.l1_mean, name="l1Mean") self.s_s_nnz = tf.scalar_summary('nnz', self.underThresh, name="nnz") self.s_t_loss = tf.scalar_summary('t loss', self.t_loss, name="t_lossSum") self.s_t_recon = tf.scalar_summary('t recon error', self.t_reconError, name="t_reconError") self.s_t_errorStd = tf.scalar_summary('t errorStd', self.t_errorStd, name="t_errorStd") self.s_t_l1 = tf.scalar_summary('t l1 sparsity', self.t_l1Sparsity, name="t_l1Sparsity") self.s_t_l1_mean = tf.scalar_summary('t l1 mean', self.t_l1_mean, name="t_l1Mean") self.h_input = tf.histogram_summary('input', self.inputImage, name="input") self.h_recon = tf.histogram_summary('recon', self.recon, name="recon") self.h_v1_w = tf.histogram_summary('V1_W', self.V1_W, name="V1_W") self.h_v1_a = tf.histogram_summary('V1_A', self.V1_A, name="V1_A") self.h_log_v1_a = tf.histogram_summary('Log_V1_A', self.log_V1_A, name="Log_V1_A") self.h_normVals = tf.histogram_summary('normVals', self.normVals, name="normVals")
def train(self, states, actions, rewards, initial, **unused): n = self.rlConfig.tdN state_shape = tf.shape(states) batch_size = state_shape[0] experience_length = state_shape[1] train_length = experience_length - n # if not natural q_outputs, q_hidden = tf.nn.dynamic_rnn(self.q_rnn, states, initial_state=initial) predictedQs = self.q_out(q_outputs) takenQs = tfl.batch_dot(actions, predictedQs) trainQs = tf.slice(takenQs, [0, 0], [-1, train_length]) # smooth between TD(m) for m<=n? targets = tf.slice(takenQs, [0, n], [-1, train_length]) #targets = values[:,n:] for i in reversed(range(n)): targets *= self.rlConfig.discount targets += tf.slice(rewards, [0, i], [-1, train_length]) targets = tf.stop_gradient(targets) """ TODO: do we still want this code path for maxQ/sarsa? targetQs = predictedQs realQs = tfl.batch_dot(actions, targetQs) maxQs = tf.reduce_max(targetQs, -1) targetQs = realQs if self.sarsa else maxQs tf.scalar_summary("q_max", tf.reduce_mean(maxQs)) # smooth between TD(m) for m<=n? targets = tf.slice(targetQs, [0, n], [-1, train_length]) for i in reversed(range(n)): targets = tf.slice(rewards, [0, i], [-1, train_length]) + self.rlConfig.discount * targets targets = tf.stop_gradient(targets) """ qLoss = tf.reduce_mean(tf.squared_difference(trainQs, targets)) tf.scalar_summary("q_loss", qLoss) tf.scalar_summary("q_uev", qLoss / tfl.sample_variance(targets)) # all this just to log entropy statistics flatQs = tf.reshape(predictedQs, [-1, self.action_size]) action_probs = tf.nn.softmax(flatQs / self.temperature) action_probs = (1.0 - self.epsilon) * action_probs + self.epsilon / self.action_size log_action_probs = tf.log(action_probs) entropy = -tfl.batch_dot(action_probs, log_action_probs) tf.scalar_summary("entropy_avg", tf.reduce_mean(entropy)) tf.histogram_summary("entropy", entropy) meanQs = tfl.batch_dot(action_probs, flatQs) tf.scalar_summary("q_mean", tf.reduce_mean(meanQs)) params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='q') def metric(q1, q2): return tf.reduce_mean(tf.squared_difference(q1, q2)) trainQ = self.optimizer.optimize(qLoss, params, predictedQs, metric) return trainQ """
def build_dqn(self): self.w = {} self.t_w = {} #initializer = tf.contrib.layers.xavier_initializer() initializer = tf.truncated_normal_initializer(0, 0.02) activation_fn = tf.nn.relu # training network with tf.variable_scope('prediction'): if self.cnn_format == 'NHWC': self.s_t = tf.placeholder('float32', [None, self.screen_width, self.screen_height, self.history_length], name='s_t') else: self.s_t = tf.placeholder('float32', [None, self.history_length, self.screen_width, self.screen_height], name='s_t') self.l1, self.w['l1_w'], self.w['l1_b'] = conv2d(self.s_t, 32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='l1') self.l2, self.w['l2_w'], self.w['l2_b'] = conv2d(self.l1, 64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name='l2') self.l3, self.w['l3_w'], self.w['l3_b'] = conv2d(self.l2, 64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name='l3') shape = self.l3.get_shape().as_list() self.l3_flat = tf.reshape(self.l3, [-1, reduce(lambda x, y: x * y, shape[1:])]) self.l4, self.w['l4_w'], self.w['l4_b'] = linear(self.l3_flat, 512, activation_fn=activation_fn, name='l4') self.q, self.w['q_w'], self.w['q_b'] = linear(self.l4, self.env.action_size, name='q') self.q_action = tf.argmax(self.q, dimension=1) q_summary = [] avg_q = tf.reduce_mean(self.q, 0) for idx in xrange(self.env.action_size): q_summary.append(tf.histogram_summary('q/%s' % idx, avg_q[idx])) self.q_summary = tf.merge_summary(q_summary, 'q_summary') # target network with tf.variable_scope('target'): if self.cnn_format == 'NHWC': self.target_s_t = tf.placeholder('float32', [None, self.screen_width, self.screen_height, self.history_length], name='target_s_t') else: self.target_s_t = tf.placeholder('float32', [None, self.history_length, self.screen_width, self.screen_height], name='target_s_t') self.target_l1, self.t_w['l1_w'], self.t_w['l1_b'] = conv2d(self.target_s_t, 32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='target_l1') self.target_l2, self.t_w['l2_w'], self.t_w['l2_b'] = conv2d(self.target_l1, 64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name='target_l2') self.target_l3, self.t_w['l3_w'], self.t_w['l3_b'] = conv2d(self.target_l2, 64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name='target_l3') shape = self.target_l3.get_shape().as_list() self.target_l3_flat = tf.reshape(self.target_l3, [-1, reduce(lambda x, y: x * y, shape[1:])]) self.target_l4, self.t_w['l4_w'], self.t_w['l4_b'] = \ linear(self.target_l3_flat, 512, activation_fn=activation_fn, name='target_l4') self.target_q, self.t_w['q_w'], self.t_w['q_b'] = \ linear(self.target_l4, self.env.action_size, name='target_q') with tf.variable_scope('pred_to_target'): self.t_w_input = {} self.t_w_assign_op = {} for name in self.w.keys(): self.t_w_input[name] = tf.placeholder('float32', self.t_w[name].get_shape().as_list(), name=name) self.t_w_assign_op[name] = self.t_w[name].assign(self.t_w_input[name]) # optimizer with tf.variable_scope('optimizer'): self.target_q_t = tf.placeholder('float32', [None], name='target_q_t') self.action = tf.placeholder('int64', [None], name='action') action_one_hot = tf.one_hot(self.action, self.env.action_size, 1.0, 0.0, name='action_one_hot') q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices=1, name='q_acted') self.delta = self.target_q_t - q_acted self.clipped_delta = tf.clip_by_value(self.delta, self.min_delta, self.max_delta, name='clipped_delta') self.loss = tf.reduce_mean(tf.square(self.clipped_delta), name='loss') self.optim = tf.train.RMSPropOptimizer(self.learning_rate, momentum=0.95, epsilon=0.01).minimize(self.loss) with tf.variable_scope('summary'): scalar_summary_tags = ['average/reward', 'average/loss', 'average/q', \ 'episode/max reward', 'episode/min reward', 'episode/avg reward', 'episode/num of game'] self.summary_placeholders = {} self.summary_ops = {} for tag in scalar_summary_tags: self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.scalar_summary(tag, self.summary_placeholders[tag]) histogram_summary_tags = ['episode/rewards', 'episode/actions'] for tag in histogram_summary_tags: self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.histogram_summary(tag, self.summary_placeholders[tag]) self.writer = tf.train.SummaryWriter('./logs/%s' % self.model_dir, self.sess.graph) tf.initialize_all_variables().run() self._saver = tf.train.Saver(self.w.values() + [self.step_op], max_to_keep=30) self.load_model() self.update_target_q_network()
def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor): """Adds a new softmax and fully-connected layer for training. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the weights, and then sets up all the gradients for the backward pass. The set up for the softmax and fully-connected layers is based on: https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html Args: class_count: Integer of how many categories of things we're trying to recognize. final_tensor_name: Name string for the new final node that produces results. bottleneck_tensor: The output of the main CNN graph. Returns: The tensors for the training and cross entropy results, and tensors for the bottleneck input and ground truth input. """ with tf.name_scope('input'): bottleneck_input = tf.placeholder_with_default( bottleneck_tensor, shape=[None, BOTTLENECK_TENSOR_SIZE], name='BottleneckInputPlaceholder') ground_truth_input = tf.placeholder(tf.float32, [None, class_count], name='GroundTruthInput') # Organizing the following ops as `final_training_ops` so they're easier # to see in TensorBoard layer_name = 'final_training_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): layer_weights = tf.Variable(tf.truncated_normal( [BOTTLENECK_TENSOR_SIZE, class_count], stddev=0.001), name='final_weights') variable_summaries(layer_weights, layer_name + '/weights') with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') variable_summaries(layer_biases, layer_name + '/biases') with tf.name_scope('Wx_plus_b'): logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases tf.histogram_summary(layer_name + '/pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) tf.histogram_summary(final_tensor_name + '/activations', final_tensor) with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits, ground_truth_input) with tf.name_scope('total'): cross_entropy_mean = tf.reduce_mean(cross_entropy) tf.scalar_summary('cross entropy', cross_entropy_mean) with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer( FLAGS.learning_rate).minimize(cross_entropy_mean) return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input, final_tensor)
def model(X, w_h, w_o, b, b2): # Add layer name scopes for better graph visualization with tf.name_scope("Layer2"): h = tf.nn.relu(tf.matmul(X, w_h) + b) with tf.name_scope("Layer3"): return tf.nn.softmax(tf.matmul(h, w_o) + b2) #Initialize weights w_h = init_weights([2, number_hidden_nodes], "w_h") w_o = init_weights([number_hidden_nodes, 2], "w_o") b = tf.Variable(tf.zeros([number_hidden_nodes])) b2 = tf.Variable(tf.zeros([2])) #Histogram summaries for weights tf.histogram_summary("w_h_summ", w_h) tf.histogram_summary("w_o_summ", w_o) #Create Model (One hidden layer) py_x = model(x, w_h, w_o, b, b2) #Cost function with tf.name_scope("cost") as scope: cost = -tf.reduce_sum(y_ * tf.log(py_x)) train_op = tf.train.GradientDescentOptimizer(0.2).minimize(cost) # Add scalar summary for cost tensor tf.scalar_summary("cost", cost) #Measure accuracy with tf.name_scope("accuracy"): correct_pred = tf.equal(tf.argmax(expect, 1),
def create_variables(self): self.target_actor = self.actor.copy(scope="target_actor") self.target_critic = self.critic.copy(scope="target_critic") # FOR REGULAR ACTION SCORE COMPUTATION with tf.name_scope("taking_action"): # self.observation = tf.placeholder(tf.float32, (None, self.observation_size), name="observation") self.actor_val = self.actor(self.observation) # self.actor_val = tf.placeholder(tf.float32, (None, 20, 2), name="asd") # self.actor_action = tf.identity(self.get_last(self.actor_val), name="actor_action") self.actor_action = tf.identity(self.actor.get_last( self.observation), name="actor_action") # self.actor_action = tf.identity([[1.1, 1.1]], name="actor_action") # tf.histogram_summary("actions", self.actor_action) # FOR PREDICTING TARGET FUTURE REWARDS with tf.name_scope("estimating_future_reward"): # self.next_observation = tf.placeholder(tf.float32, (None, self.observation_size), name="next_observation") self.next_observation_mask = tf.placeholder( tf.float32, (None, ), name="next_observation_mask") self.next_action = tf.stop_gradient( self.target_actor(self.next_observation)) # ST # print "next action: " + str(self.next_action) tf.histogram_summary("target_actions", self.next_action) self.next_value = tf.stop_gradient( tf.reshape( self.target_critic( self.concat_nn_lstm_input( self.next_observation, self.add_pow_values(self.next_action))), [-1])) # ST self.rewards = tf.placeholder(tf.float32, (None, ), name="rewards") self.future_reward = self.rewards + self.discount_rate * self.next_observation_mask * self.next_value with tf.name_scope("critic_update"): ##### ERROR FUNCTION ##### # self.given_action = tf.placeholder(tf.float32, (None, self.action_size), name="given_action") self.value_given_action = tf.reshape( self.critic( self.concat_nn_lstm_input( self.observation, self.add_pow_values(self.given_action))), [-1]) tf.scalar_summary("value_for_given_action", tf.reduce_mean(self.value_given_action)) temp_diff = self.value_given_action - self.future_reward self.critic_error = tf.identity(tf.reduce_mean( tf.square(temp_diff)), name='critic_error') ##### OPTIMIZATION ##### critic_gradients = self.optimizer.compute_gradients( self.critic_error, var_list=self.critic.variables()) # Add histograms for gradients. for grad, var in critic_gradients: tf.histogram_summary('critic_update/' + var.name, var) if grad is not None: tf.histogram_summary( 'critic_update/' + var.name + '/gradients', grad) self.critic_update = self.optimizer.apply_gradients( critic_gradients, name='critic_train_op') tf.scalar_summary("critic_error", self.critic_error) with tf.name_scope("actor_update"): ##### ERROR FUNCTION ##### self.actor_score = self.critic( self.concat_nn_lstm_input(self.observation, self.add_pow_values(self.actor_val))) ##### OPTIMIZATION ##### # here we are maximizing actor score. # only optimize actor variables here, while keeping critic constant actor_gradients = self.optimizer.compute_gradients( tf.reduce_mean(-self.actor_score), var_list=self.actor.variables()) # Add histograms for gradients. for grad, var in actor_gradients: tf.histogram_summary('actor_update/' + var.name, var) if grad is not None: tf.histogram_summary( 'actor_update/' + var.name + '/gradients', grad) self.actor_update = self.optimizer.apply_gradients( actor_gradients, name='actor_train_op') tf.scalar_summary("actor_score", tf.reduce_mean(self.actor_score)) # UPDATE TARGET NETWORK with tf.name_scope("target_network_update"): self.target_actor_update = ContinuousDeepQLSTMStepped.update_target_network( self.actor, self.target_actor, self.target_actor_update_rate) self.target_critic_update = ContinuousDeepQLSTMStepped.update_target_network( self.critic, self.target_critic, self.target_critic_update_rate) self.update_all_targets = tf.group(self.target_actor_update, self.target_critic_update, name='target_networks_update') self.summarize = tf.merge_all_summaries() self.no_op1 = tf.no_op()
def main(_): # Download data if no local copy exists data_sets = input_data.read_data_sets(FLAGS.train_dir, one_hot=True, target_label=FLAGS.target_label) # Create the session sess = tf.InteractiveSession() # Input and label placeholders num_classes = data_sets.train.num_classes num_features = data_sets.train.num_features x = tf.placeholder('float', shape=[None, num_features], name='x-input') y_ = tf.placeholder('float', shape=[None, num_classes], name='y-input') keep_prob = tf.placeholder('float', name='k-prob') # Convolutional layer with tf.name_scope('conv'): W_conv = weight_variable([4, 4, 1, FLAGS.conv_fmaps]) b_conv = bias_variable([FLAGS.conv_fmaps]) # Reshape and convolve x_image = tf.reshape(x, [-1, 28, 28, 1]) h_conv = tf.nn.relu(conv2d(x_image, W_conv) + b_conv) #h_pool = max_pool_2x2(h_conv) # Fully connected layer1 with tf.name_scope('fc_1'): W_fc1 = weight_variable([13 * 13 * FLAGS.conv_fmaps, FLAGS.fc1_nodes]) b_fc1 = bias_variable([FLAGS.fc1_nodes]) # Reshape and apply relu #h_pool1_flat = tf.reshape(h_pool, [-1, 7 * 7 * FLAGS.conv_fmaps]) #h_fc1 = tf.nn.relu(tf.matmul(h_pool1_flat, W_fc1) + b_fc1) # If the max_pool operation is ignored... h_flat = tf.reshape(h_conv, [-1, 13 * 13 * FLAGS.conv_fmaps]) h_fc1 = tf.nn.relu(tf.matmul(h_flat, W_fc1) + b_fc1) # Fully connected layer2 with tf.name_scope('fc_2'): W_fc2 = weight_variable([FLAGS.fc1_nodes, FLAGS.fc2_nodes]) b_fc2 = bias_variable([FLAGS.fc2_nodes]) # Apply relu h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2) # Apply dropout to fc_2 output h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) # Readout layer with tf.name_scope('readout'): W_out = weight_variable([FLAGS.fc2_nodes, num_classes]) b_out = bias_variable([num_classes]) y = tf.nn.softmax(tf.matmul(h_fc2_drop, W_out) + b_out) # Add summary ops for tensorboard _ = tf.histogram_summary('W_conv', W_conv) _ = tf.histogram_summary('W_fc1', W_fc1) _ = tf.histogram_summary('W_fc2', W_fc2) _ = tf.histogram_summary('W_out', W_out) _ = tf.histogram_summary('b_conv', b_conv) _ = tf.histogram_summary('b_fc1', b_fc1) _ = tf.histogram_summary('b_fc2', b_fc2) _ = tf.histogram_summary('b_out', b_out) _ = tf.histogram_summary('Output', y) # Cost function with tf.name_scope('xent'): x_entropy = -tf.reduce_sum(y_ * tf.log(y)) _ = tf.scalar_summary('xentropy', x_entropy) # Train the model with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( FLAGS.learning_rate).minimize(x_entropy) # Evaluate model with tf.name_scope('eval'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) _ = tf.scalar_summary('accuracy', accuracy) # Collect all summaries during graph building summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) sess.run(tf.initialize_all_variables()) # Train the model and record summaries for i in range(FLAGS.max_steps): if i % 50 == 0: # Generate a new feed dictionary to test training accuracy feed_dict = fill_feed_dict(data_sets.train, x, y_, (keep_prob, 1.0)) # Update the summary collection result = sess.run([summary_op, accuracy], feed_dict=feed_dict) summary_str = result[0] summary_writer.add_summary(summary_str, i) train_accuracy = result[1] # Print status update print('step %d, training accuracy %g' % (i, train_accuracy)) else: # Generate a new feed dictionary for the next training batch feed_dict = fill_feed_dict(data_sets.train, x, y_, (keep_prob, FLAGS.keep_prob)) sess.run(train_step, feed_dict=feed_dict) print('test accuracy %.4f' % accuracy.eval(feed_dict={ x: data_sets.test.images, y_: data_sets.test.labels, keep_prob: 1.0 }))
learning_rate = tf.train.exponential_decay( starter_learning_rate, global_step, 300, 0.96, staircase=True) #opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) opt = tf.train.AdagradOptimizer(learning_rate=learning_rate) #opt = tf.train.AdamOptimizer() train_step = opt.minimize( loss, var_list=[coeff], global_step=global_step) # ,[mean,sigma] if options.log is not None: coeff_hist = tf.histogram_summary("coeff", coeff) loss_summary = tf.scalar_summary("loss", loss) learning_rate_summary = tf.scalar_summary( "learning_rate", learning_rate) if options.log is not None: summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session(config=tf_config) if options.log is not None: print("Writing log to {}".format(options.log)) writer = tf.train.SummaryWriter(options.log, sess.graph_def) sess.run(init)
def train(target, dataset, cluster_spec): """Train Inception on a dataset for a number of steps.""" # Number of workers and parameter servers are infered from the workers and ps # hosts string. num_workers = len(cluster_spec.as_dict()['worker']) num_parameter_servers = len(cluster_spec.as_dict()['ps']) # If no value is given, num_replicas_to_aggregate defaults to be the number of # workers. if FLAGS.num_replicas_to_aggregate == -1: num_replicas_to_aggregate = num_workers else: num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate # Both should be greater than 0 in a distributed training. assert num_workers > 0 and num_parameter_servers > 0, ( ' num_workers and ' 'num_parameter_servers' ' must be > 0.') # Choose worker 0 as the chief. Note that any worker could be the chief # but there should be only one chief. is_chief = (FLAGS.task_id == 0) # Ops are assigned to worker by default. with tf.device('/job:worker/task:%d' % FLAGS.task_id): # Variables and its related init/assign ops are assigned to ps. with slim.scopes.arg_scope( [slim.variables.variable, slim.variables.global_step], device=slim.variables.VariableDeviceChooser( num_parameter_servers)): # Create a variable to count the number of train() calls. This equals the # number of updates applied to the variables. global_step = slim.variables.global_step() # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) # Decay steps need to be divided by the number of replicas to aggregate. decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / num_replicas_to_aggregate) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Add a summary to track the learning rate. tf.scalar_summary('learning_rate', lr) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) images, labels = image_processing.distorted_inputs( dataset, batch_size=FLAGS.batch_size, num_preprocess_threads=FLAGS.num_preprocess_threads) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 logits = inception.inference(images, num_classes, for_training=True) # Add classification loss. inception.loss(logits, labels) # Gather all of the losses including regularization losses. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses, name='total_loss') if is_chief: # Compute the moving average of all individual losses and the # total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; # do the same for the averaged version of the losses. for l in losses + [total_loss]: loss_name = l.op.name # Name each loss as '(raw)' and name the moving average version of the # loss as the original loss name. tf.scalar_summary(loss_name + ' (raw)', l) tf.scalar_summary(loss_name, loss_averages.average(l)) # Add dependency to compute loss_averages. with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) # Track the moving averages of all trainable variables. # Note that we maintain a 'double-average' of the BatchNormalization # global statistics. # This is not needed when the number of replicas are small but important # for synchronous distributed training with tens of workers/replicas. exp_moving_averager = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) # Add histograms for model variables. for var in variables_to_average: tf.histogram_summary(var.op.name, var) # Create synchronous replica optimizer. opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_replicas_to_aggregate, replica_id=FLAGS.task_id, total_num_replicas=num_workers, variable_averages=exp_moving_averager, variables_to_average=variables_to_average) batchnorm_updates = tf.get_collection( slim.ops.UPDATE_OPS_COLLECTION) assert batchnorm_updates, 'Batchnorm updates are missing' batchnorm_updates_op = tf.group(*batchnorm_updates) # Add dependency to compute batchnorm_updates. with tf.control_dependencies([batchnorm_updates_op]): total_loss = tf.identity(total_loss) # Compute gradients with respect to the loss. grads = opt.compute_gradients(total_loss) # Add histograms for gradients. for grad, var in grads: if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) apply_gradients_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradients_op]): train_op = tf.identity(total_loss, name='train_op') # Get chief queue_runners, init_tokens and clean_up_op, which is used to # synchronize replicas. # More details can be found in sync_replicas_optimizer. chief_queue_runners = [opt.get_chief_queue_runner()] init_tokens_op = opt.get_init_tokens_op() clean_up_op = opt.get_clean_up_op() # Create a saver. saver = tf.train.Saver() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init_op = tf.initialize_all_variables() # We run the summaries in the same thread as the training operations by # passing in None for summary_op to avoid a summary_thread being started. # Running summaries and training operations in parallel could run out of # GPU memory. sv = tf.train.Supervisor(is_chief=is_chief, logdir=FLAGS.train_dir, init_op=init_op, summary_op=None, global_step=global_step, saver=saver, save_model_secs=FLAGS.save_interval_secs) tf.logging.info('%s Supervisor' % datetime.now()) sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) # Get a session. sess = sv.prepare_or_wait_for_session(target, config=sess_config) # Start the queue runners. queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS) sv.start_queue_runners(sess, queue_runners) tf.logging.info('Started %d queues for processing input data.', len(queue_runners)) if is_chief: sv.start_queue_runners(sess, chief_queue_runners) sess.run(init_tokens_op) # Train, checking for Nans. Concurrently run the summary operation at a # specified interval. Note that the summary_op and train_op never run # simultaneously in order to prevent running out of GPU memory. next_summary_time = time.time() + FLAGS.save_summaries_secs step = 0 while (not sv.should_stop()) and step <= 2000: try: start_time = time.time() run_metadata = tf.RunMetadata() profile_step = 60 trace_done = False if step == profile_step: tf.logging.info("Tracing at step %d" % step) loss_value, step = sess.run( [train_op, global_step], options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) trace_done = True else: loss_value, step = sess.run([train_op, global_step]) duration = time.time() - start_time if trace_done: trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open('/tmp/timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format()) trace_file.close() assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step > FLAGS.max_steps: break examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('Worker %d: %s: step %d, loss = %.2f' '(%.1f examples/sec; %.3f sec/batch)') if step >= 10 and step != profile_step + 1: tf.logging.info( format_str % (FLAGS.task_id, datetime.now(), step, loss_value, examples_per_sec, duration)) else: tf.logging.info( 'Not considering step %d (%.1f samples/sec)' % (step, examples_per_sec)) # Determine if the summary_op should be run on the chief worker. # if is_chief and next_summary_time < time.time(): # tf.logging.info('Running Summary operation on the chief.') # summary_str = sess.run(summary_op) # sv.summary_computed(sess, summary_str) # tf.logging.info('Finished running Summary operation.') # # # Determine the next time for running the summary. # next_summary_time += FLAGS.save_summaries_secs except: if is_chief: tf.logging.info('About to execute sync_clean_up_op!') sess.run(clean_up_op) raise # Stop the supervisor. This also waits for service threads to finish. sv.stop() # Save after the training ends. if is_chief: saver.save(sess, os.path.join(FLAGS.train_dir, 'model.ckpt'), global_step=global_step)
def _activation_summary(tensor): tensor_name = re.sub("%s_[0-9]*/" % TOWER_NAME, "", tensor.op.name) tf.histogram_summary(tensor_name + "/activations", tensor) tf.scalar_summary(tensor_name + "/sparsity", tf.nn.zero_fraction(tensor))
def run_train(w2vsource, w2vdim, w2vnumfilters, lexdim, lexnumfilters, randomseed, datasource, model_name, trainable, the_epoch): np.random.seed(randomseed) max_len = 60 norm_model = [] with Timer("lex"): print 'new way of loading lexicon' default_vector_dic = { 'EverythingUnigramsPMIHS': [0], 'HS-AFFLEX-NEGLEX-unigrams': [0, 0, 0], 'Maxdiff-Twitter-Lexicon_0to1': [0.5], 'S140-AFFLEX-NEGLEX-unigrams': [0, 0, 0], 'unigrams-pmilexicon': [0, 0, 0], 'unigrams-pmilexicon_sentiment_140': [0, 0, 0], 'BL': [0] } lexfile_list = [ 'EverythingUnigramsPMIHS.pickle', 'HS-AFFLEX-NEGLEX-unigrams.pickle', 'Maxdiff-Twitter-Lexicon_0to1.pickle', 'S140-AFFLEX-NEGLEX-unigrams.pickle', 'unigrams-pmilexicon.pickle', 'unigrams-pmilexicon_sentiment_140.pickle', 'BL.pickle' ] for idx, lexfile in enumerate(lexfile_list): fname = '../data/le/%s' % lexfile print 'default lexicon for %s' % lexfile with open(fname, 'rb') as handle: each_model = pickle.load(handle) default_vector = default_vector_dic[lexfile.replace( '.pickle', '')] each_model["<PAD/>"] = default_vector norm_model.append(each_model) unigram_lexicon_model = norm_model # CONFIGURE # ================================================== if datasource == 'semeval': numberofclass = 3 use_rotten_tomato = False elif datasource == 'sst': numberofclass = 5 use_rotten_tomato = True # Training # ================================================== if randomseed > 0: tf.set_random_seed(randomseed) with tf.Graph().as_default(): tf.set_random_seed(randomseed) max_af1_dev = 0 index_at_max_af1_dev = 0 af1_tst_at_max_af1_dev = 0 #WORD2VEC x_text, y = cnn_data_helpers.load_data_trainable( "everydata", rottenTomato=use_rotten_tomato) max_document_length = max([len(x.split(" ")) for x in x_text]) vocab_processor = learn.preprocessing.VocabularyProcessor( max_document_length) vocab_processor.fit_transform(x_text) total_vocab_size = len(vocab_processor.vocabulary_) x_train, y_train = cnn_data_helpers.load_data_trainable( "trn", rottenTomato=use_rotten_tomato) x_dev, y_dev = cnn_data_helpers.load_data_trainable( "dev", rottenTomato=use_rotten_tomato) x_test, y_test = cnn_data_helpers.load_data_trainable( "tst", rottenTomato=use_rotten_tomato) x_train = np.array(list(vocab_processor.fit_transform(x_train))) x_dev = np.array(list(vocab_processor.fit_transform(x_dev))) x_test = np.array(list(vocab_processor.fit_transform(x_test))) del (norm_model) gc.collect() print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): if randomseed > 0: tf.set_random_seed(randomseed) cnn = W2V_TRAINABLE(sequence_length=x_train.shape[1], num_classes=numberofclass, vocab_size=len(vocab_processor.vocabulary_), is_trainable=trainable, embedding_size=w2vdim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=w2vnumfilters, embedding_size_lex=lexdim, num_filters_lex=lexnumfilters, themodel=model_name, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) f1_summary = tf.scalar_summary("avg_f1", cnn.avg_f1) # Train Summaries train_summary_op = tf.merge_summary( [loss_summary, acc_summary, f1_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.train.SummaryWriter( train_summary_dir, sess.graph_def) # Dev summaries dev_summary_op = tf.merge_summary( [loss_summary, acc_summary, f1_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.train.SummaryWriter( dev_summary_dir, sess.graph_def) # Test summaries test_summary_op = tf.merge_summary( [loss_summary, acc_summary, f1_summary]) test_summary_dir = os.path.join(out_dir, "summaries", "test") test_summary_writer = tf.train.SummaryWriter( test_summary_dir, sess.graph_def) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) # Initialize all variables sess.run(tf.initialize_all_variables()) the_base_path = '../data/emory_w2v/' if w2vsource == "twitter": the_model_path = the_base_path + 'w2v-%d.bin' % w2vdim elif w2vsource == "amazon": the_model_path = the_base_path + 'w2v-%d-%s.bin' % (w2vdim, w2vsource) # initial matrix with random uniform initW = np.random.uniform(0.0, 0.0, (total_vocab_size, w2vdim)) initW_lex = np.random.uniform(0.00, 0.2, (total_vocab_size, lexdim)) # load any vectors from the word2vec with Timer("LOADING W2V..."): print("LOADING word2vec file {} \n".format(the_model_path)) #W2V with open(the_model_path, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) binary_len = np.dtype('float32').itemsize * layer1_size for line in xrange(vocab_size): word = [] while True: ch = f.read(1) if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = vocab_processor.vocabulary_.get(word) if idx != 0: #print str(idx) + " -> " + word initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) with Timer("LOADING LEXICON..."): vocabulary_set = set() for index, eachModel in enumerate(unigram_lexicon_model): for word in eachModel: vocabulary_set.add(word) for word in vocabulary_set: lexiconList = np.empty([0, 1]) for index, eachModel in enumerate(unigram_lexicon_model): if word in eachModel: temp = np.array(np.float32(eachModel[word])) else: temp = np.array(np.float32(eachModel["<PAD/>"])) lexiconList = np.append(lexiconList, temp) idx = vocab_processor.vocabulary_.get(word) if idx != 0: initW_lex[idx] = lexiconList sess.run(cnn.W.assign(initW)) if model_name == 'w2v_lex': sess.run(cnn.W_lex.assign(initW_lex)) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1 ], feed_dict) time_str = datetime.datetime.now().isoformat() train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None, score_type='f1'): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1 ], feed_dict) time_str = datetime.datetime.now().isoformat() print( "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}" .format("DEV", time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) if score_type == 'f1': return avg_f1 else: return accuracy def test_step(x_batch, y_batch, writer=None, score_type='f1'): """ Evaluates model on a test set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1 = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.neg_r, cnn.neg_p, cnn.f1_neg, cnn.f1_pos, cnn.avg_f1 ], feed_dict) time_str = datetime.datetime.now().isoformat() print( "{} : {} step {}, loss {:g}, acc {:g}, neg_r {:g} neg_p {:g} f1_neg {:g}, f1_pos {:g}, f1 {:g}" .format("TEST", time_str, step, loss, accuracy, neg_r, neg_p, f1_neg, f1_pos, avg_f1)) if writer: writer.add_summary(summaries, step) if score_type == 'f1': return avg_f1 else: return accuracy # Generate batches batches = cnn_data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, the_epoch) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("Evaluation:") if datasource == 'semeval': curr_af1_dev = dev_step(x_dev, y_dev, writer=dev_summary_writer) curr_af1_tst = test_step(x_test, y_test, writer=test_summary_writer) elif datasource == 'sst': curr_af1_dev = dev_step(x_dev, y_dev, writer=dev_summary_writer, score_type='acc') curr_af1_tst = test_step(x_test, y_test, writer=test_summary_writer, score_type='acc') if curr_af1_dev > max_af1_dev: max_af1_dev = curr_af1_dev index_at_max_af1_dev = current_step af1_tst_at_max_af1_dev = curr_af1_tst print 'Status: [%d] Max f1 for dev (%f), Max f1 for tst (%f)\n' % ( index_at_max_af1_dev, max_af1_dev, af1_tst_at_max_af1_dev) sys.stdout.flush()
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE, global_step, decay_steps, cifar10.LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. loss = tower_loss(scope) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def training(DO_DEV_SPLIT, FLAGS, scheme_name, vocabulary, embed_matrix, x_train, x_dev, y_train, y_dev, num_filters, dropout_prob, l2_lambda, test_x, test_y): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN( sequence_length=x_train.shape[1], num_classes=FLAGS. num_classes, # Number of classification classes vocab_size=len(vocabulary), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=num_filters, l2_reg_lambda=l2_lambda, init_embedding=embed_matrix) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) with tf.name_scope('grad_summary'): grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.histogram_summary( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.scalar_summary( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.merge_summary(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", scheme_name, timestamp)) print(("Writing to {}\n".format(out_dir))) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("loss", cnn.loss) pred_ratio_summary = [] for i in range(FLAGS.num_classes): pred_ratio_summary.append( tf.scalar_summary( "prediction/label_" + str(i) + "_percentage", cnn.rate_percentage[i])) acc_summary = tf.scalar_summary("accuracy", cnn.accuracy) # Train Summaries with tf.name_scope('train_summary'): train_summary_op = tf.merge_summary([ loss_summary, acc_summary, pred_ratio_summary, grad_summaries_merged ]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.train.SummaryWriter( train_summary_dir, sess.graph_def) # Dev summaries with tf.name_scope('dev_summary'): dev_summary_op = tf.merge_summary( [loss_summary, acc_summary, pred_ratio_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.train.SummaryWriter( dev_summary_dir, sess.graph_def) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=7) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dropout_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() print(("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy))) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print(("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy))) if writer: writer.add_summary(summaries, step) # Generate batches batches = dh.DataHelperPan12.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) if test_x is not None and test_y is not None: test_x_1 = test_x[:100] test_y_1 = test_y[:100] test_x_2 = test_x[100:200] test_y_2 = test_y[100:200] # Training loop. For each batch... for batch in batches: x_batch, y_batch = list(zip(*batch)) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if DO_DEV_SPLIT and current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_batches = dh.DataHelperPan12.batch_iter( list(zip(x_dev, y_dev)), 100, 1) for dev_batch in dev_batches: if len(dev_batch) > 0: small_dev_x, small_dev_y = list(zip(*dev_batch)) dev_step(small_dev_x, small_dev_y, writer=dev_summary_writer) print("") elif test_x is not None and test_y is not None and current_step % 200 == 0: dev_step(test_x_1, test_y_1, writer=dev_summary_writer) dev_step(test_x_2, test_y_2, writer=dev_summary_writer) if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print(("Saved model checkpoint to {}\n".format(path))) if current_step == 3500: break return timestamp
def monitor_train_vars(collections=None): for var in tf.trainable_variables(): tf.histogram_summary(var.op.name, var, collections=collections)