def restore(_): 
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
    with tf.Graph().as_default(): 
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder,
                                 FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Restore model
        saver = tf.train.Saver()
        print(eval_correct.name);
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
            saver.restore(sess,ckpt.model_checkpoint_path)
            steps = data_sets.test.num_examples // FLAGS.batch_size
            accuracy = 0
            for i in range(steps):
                batchx,batchy = data_sets.test.next_batch(FLAGS.batch_size)
                accuracy += sess.run(eval_correct,feed_dict={images_placeholder:batchx,
                        labels_placeholder:batchy})
            print("accuracy: {}".format(accuracy/float(steps*FLAGS.batch_size)))
示例#2
0
def main(_):
    data_sets = input_data.read_data_sets(data_dir)
    images_placeholder = tf.placeholder(tf.float32,
                                        shape=(batch_size, mnist.IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
    logits = mnist.inference(images_placeholder, hidden1, hidden2)
    loss = mnist.loss(logits, labels_placeholder)
    train_op = mnist.training(loss, learning_rate)
    eval_correct = mnist.evaluation(logits, labels_placeholder)
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    for step in range(max_steps):
        start_time = time.time()
        feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                   labels_placeholder)
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        duration = time.time() - start_time
        if step % 100 == 0:
            print('Step %d: loss = %.2f (%.3f sec)' %
                  (step, loss_value, duration))
        if (step + 1) % 1000 == 0 or (step + 1) == max_steps:
            print('Training Data Eval:')
            do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
                    data_sets.train)
            print('Validation Data Eval:')
            do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
                    data_sets.validation)
            print('Test Data Eval:')
            do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
                    data_sets.test)
示例#3
0
def run_training():
    data_set = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
    # 默认在Graph下运行
    with tf.Graph().as_default():
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)
        loss = mnist.loss(logits, labels_placeholder)
        train_op = mnist.training(loss, FLAGS.learning_rate)
        eval_correct = mnist.evaluation(logits, labels_placeholder)
        # 汇总tensor
        summary = tf.summary.merge_all()
        # 建立初始化机制
        init = tf.global_variables_initializer()
        # 建立保存机制
        saver = tf.train.Saver()
        #建立session
        sess = tf.Session()
        # 建立一个SummaryWriter输出汇聚的tensor
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        sess.run(init)
        # 开始训练
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            # 获得当前循环次数
            feed_dict = fill_feed_dict(data_set.train, images_placeholder,
                                       labels_placeholder)
            '''sess.run() 会返回一个有两个元素的元组。其中每一个 Tensor 对象,
            对应了返回的元组 中的numpy数组,而这些数组中包含了当前这步训练中对应Tensor的值。
            由于 train_op 并不会产生输出,其在返 回的元祖中的对应元素就是 None ,
            所以会被抛弃。但是,如果模型在训练中出现偏差, loss Tensor的值可能 会变成NaN,
            所以我们要获取它的值,并记录下来'''
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
            duration = time.time() - start_time
            if step % 100 == 0:
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
            # 每1000次测试模型
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                print('Traning data eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_set.train)
                print('Validation data eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_set.validation)
                print('test data eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_set.test)
def run_training():
    data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
    with tf.Graph().as_default():
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

    logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)

    loss = mnist.loss(logits, labels_placeholder)

    train_op = mnist.training(loss, FLAGS.learning_rate)

    eval_correct = mnist.evaluation(logits, labels_placeholder)

    summary_op = tf.merge_all_summaries()

    init = tf.initialize_all_variables()

    saver = tf.train.Saver()

    sess = tf.Session()

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    sess.run(init)

    for step in range(FLAGS.max_steps):
        start_time = time.time()
        feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                   labels_placeholder)
        _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
        duration = time.time() - start_time
        if step % 100 == 0:
            print('Step %d: loss = %.2f (%.3f sec)' %
                  (step, loss_value, duration))
            summary_str = sess.run(summary_op, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
                saver.save(sess, checkpoint_file, global_step=step)
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
        print('Validation Data Eval:')
        do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
                data_sets.validation)
        print('Test Data Eval:')
        do_eval(sess, eval_correct, images_placeholder, labels_placeholder,
                data_sets.test)
示例#5
0
def run_training():
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder = placeholder_inputs(
        FLAGS.batch_size)

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images_placeholder,
                             FLAGS.hidden1,
                             FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver(tf.all_variables())

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    init = tf.initialize_all_variables()
    sess.run(init)

    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
    else:
      print('...no checkpoint found...')

    # Evaluate against the test set.
    print('Test Data Eval:')
    do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def run_training():
    """Train MNIST for a number of steps."""
    # 获取mnist的训练集、验证集和测试集
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    # 创建默认的计算图
    with tf.Graph().as_default():
        # 生成输入数据images和labels的占位符
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # 模型输出
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)
        # 模型损失
        loss = mnist.loss(logits, labels_placeholder)
        # 训练操作
        train_op = mnist.training(loss, FLAGS.learning_rate)
        # 评估操作
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # 合并所有的summary
        summary = tf.summary.merge_all()

        # 所有变量初始化操作
        init = tf.global_variables_initializer()

        # 创建保存checkpoints的saver
        saver = tf.train.Saver()  # Saver可以选择要保存的参数

        # 开启session
        sess = tf.Session()

        # 保存计算图
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # 初始化变量
        sess.run(init)

        # 开启训练的循环操作
        for step in xrange(FLAGS.max_steps):
            # 记录开始时间
            start_time = time.time()

            # 获取feed_dict
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)
            # 获取损失
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            # 计算花费的时间
            duration = time.time() - start_time

            # 每隔100步打印训练信息,保存summary
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)  # 添加summary
                summary_writer.flush()  # 缓冲summary

            # 每隔1000步保存checkpoint,并对模型做出评估
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # 在训练集上评估
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder,
                        data_sets.train)  # 传入eval_correct操作
                # 在验证集上评估
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # 在测试集上评估
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#7
0
def main(_):
    """Train MNIST"""
    data_sets = input_data.read_data_sets(FLAGS.data_dir, FLAGS.fake_data)

    # Build graph: use default graph
    graph = tf.Graph()
    with graph.as_default():
        # Training input feeds
        images_placeholder = tf.placeholder(tf.float32,
                                            shape=(FLAGS.batch_size,
                                                   mnist.IMAGE_PIXELS))
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(FLAGS.batch_size, ))

        # Build model: inference/loss/training + evaluation
        # Implementation in mnist.py from TensorFlow library
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)
        loss = mnist.loss(logits, labels_placeholder)
        train_op = mnist.training(loss, FLAGS.learning_rate)
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Reporting, initialization and checkpointing
        summary = tf.merge_all_summaries()
        init = tf.initialize_all_variables()
        saver = tf.train.Saver()

    # Run session: initialize and do training loops
    with tf.Session(graph=graph) as sess:
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        # Now that everything has been built, start execution
        sess.run(init)
        for step in range(FLAGS.max_steps):
            start_time = time.time()

            # Construct batch of MNIST images/labels to feed into NN
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Execute and fetch results: train_op is the key operation,
            # but the result we want is loss
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Report training progress / write files for TensorBoard
            if step % 100 == 0:
                print('Step {}: loss = {} ({} sec)'.format(
                    step, loss_value, duration))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
                saver.save(sess, checkpoint_file, global_step=step)
                # Print precision against training, validation & test sets
                print('Training precision:  ', end='')
                evaluate(sess, eval_correct, images_placeholder,
                         labels_placeholder, data_sets.train)
                print('Validation precision:  ', end='')
                evaluate(sess, eval_correct, images_placeholder,
                         labels_placeholder, data_sets.validation)
                print('Test precision:  ', end='')
                evaluate(sess, eval_correct, images_placeholder,
                         labels_placeholder, data_sets.test)
示例#8
0
def run_training():
    # 获取数据
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)
    # 在默认Graph下运行.
    with tf.Graph().as_default():
        # 配置graph
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)
        loss = mnist.loss(logits, labels_placeholder)
        train_op = mnist.training(loss, FLAGS.learning_rate)
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # 汇聚tensor
        summary = tf.summary.merge_all()
        # 建立初始化机制
        init = tf.global_variables_initializer()
        # 建立保存机制
        saver = tf.train.Saver()
        # 建立Session
        sess = tf.Session()

        # 建立一个SummaryWriter输出汇聚的tensor
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # 开始执行

        # 执行变量
        sess.run(init)

        # 开始训练,2000次循环
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            #获取当次循环的数据
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # 丢弃了train数据
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # 每训练100次输出当前损失,并记录数据
            if step % 100 == 0:
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # 每1000次测试模型
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder = placeholder_inputs(
        FLAGS.batch_size)

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images_placeholder,
                             FLAGS.hidden1,
                             FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    init = tf.initialize_all_variables()
    sess.run(init)

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)

    # And then after everything is built, start the training loop.
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

      # Fill a feed dictionary with the actual set of images and labels
      # for this particular training step.
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder)

      # Run one step of the model.  The return values are the activations
      # from the `train_op` (which is discarded) and the `loss` Op.  To
      # inspect the values of your Ops or variables, you may include them
      # in the list passed to sess.run() and the value tensors will be
      # returned in the tuple from the call.
      _, loss_value = sess.run([train_op, loss],
                               feed_dict=feed_dict)

      duration = time.time() - start_time

      # Write the summaries and print an overview fairly often.
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
        # Update the events file.
        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)

      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        saver.save(sess, FLAGS.train_dir, global_step=step)
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.test)
示例#10
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    # The fake_data flag is used for unit-testing purposes and may be safely ignored by the reader.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    # A tf.Graph is a collection of ops that may be executed together as a group.
    # Most TensorFlow uses will only need to rely on the single default graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        # The empty parameter to session indicates that this code will attach to
        # (or create if not yet created) the default local session.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop after initializing.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.
            # Since train_op is an Operation with no output value,
            # the corresponding element in the returned tuple is None and, thus, discarded.
            # However, the value of the loss tensor may become NaN if the model diverges during training,
            # so we capture this value for logging.
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                # The events file will be updated with new summary values every time the summary is evaluated
                # and the output passed to the writer's add_summary() function.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            # At some later point in the future, training might be resumed
            # by using the tf.train.Saver.restore method to reload the model parameters.
            # Note that more complicated usage would usually sequester the data_sets.test
            # to only be checked after significant amounts of hyperparameter tuning.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#11
0
    #  为images和labels生成 placeholders.
    images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)

    # 建立Graph从inference模型中计算预测.
    logits = mnist.inference(images_placeholder,
                             FLAGS.hidden1,
                             FLAGS.hidden2)

    # 向图中添加loss calculation的op.
    loss = mnist.loss(logits, labels_placeholder)

    # 向图中添加calculate和apply gradients的操作op.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # 向图中添加评估的准确率.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # 汇总到summary.
    summary_op = tf.summary.merge_all()

    # 创建saver来写入.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    init = tf.initialize_all_variables()
    sess.run(init)

    # Instantiate a SummaryWriter to output summaries and the Graph.
示例#12
0
def run_training():
	data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
	# ----------图表 -----------------
	with tf.Graph().as_default():# with 这个命令表明所有已经构建的操作都要与默认的tf.Graph全局实例关联起来。
		images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)
		# 建立一个从推理模型计算预测的图表。 
		logits = mnist.inference(images_placeholder, FLAGS.hidden1,FLAGS.hidden2)
	    #在图中添加损失计算的OPS。 
		loss = mnist.loss(logits, labels_placeholder)
	    # 在图中添加计算和应用渐变的OPS。
		train_op = mnist.training(loss, FLAGS.learning_rate)
	    # 在进入训练循环之前,我们应该先调用mnist.py文件中的evaluation函数,
		eval_correct = mnist.evaluation(logits, labels_placeholder)# 传入的logits和标签参数要与loss函数的一致。这样做事为了先构建Eval操作。

		# # evaluation函数会生成tf.nn.in_top_k 操作,如果在K个最有可能的预测中可以发现真的标签,
		# # 那么这个操作就会将模型输出标记为正确。在本文中,我们把K的值设置为1,
		# # 也就是只有在预测是真的标签时,才判定它是正确的。

		# eval_correct = tf.nn.in_top_k(logits, labels, 1)
		#  状态可视化 
		# 为了释放TensorBoard所使用的事件文件(events file),
		# 所有的即时数据(在这里只有一个)都要在图表构建阶段合并至一个操作(op)中。
		summary_op = tf.merge_all_summaries()
		# --------保存检查点(checkpoint)------------
		# 为了得到可以用来后续恢复模型以进一步训练或评估的检查点文件(checkpoint file),我们实例化一个tf.train.Saver。
		saver = tf.train.Saver()	
		# 在图表上创建运行OPS的会话。 
		sess = tf.Session()
	    # 运行OP初始化变量。
		init = tf.initialize_all_variables()
		sess.run(init)
	    # 在创建好会话(session)之后,可以实例化一个tf.train.SummaryWriter,用于写入包含了图表本身和即时数据具体值的事件文件。
		summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,graph_def=sess.graph_def)
		# 然后在一切都建立后,开始训练循环。
		for step in xrange(FLAGS.max_steps):
			start_time = time.time()
			feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder)
			# 在代码中明确其需要获取的两个值:[train_op, loss]
			_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
			duration = time.time() - start_time
			if step % 100 == 0:
		        # 假设训练一切正常,没有出现NaN,训练循环会每隔100个训练步骤,就打印一行简单的状态文本,告知用户当前的训练状态。
				print('步骤 %d: 损失 = %.2f (%.3f 秒)' % (step, loss_value, duration))

		        # Update the events file.
		        # 最后,每次运行summary_op时,都会往事件文件中写入最新的即时数据,
				# 函数的输出会传入事件文件读写器(writer)的add_summary()函数。。

				summary_str = sess.run(summary_op, feed_dict=feed_dict)
				summary_writer.add_summary(summary_str, step)#summary_str是summary类型的,需要放入writer中,i步数(x轴) 

			# 每隔一千个训练步骤,我们的代码会尝试使用训练数据集与测试数据集,
			# 对模型进行评估。do_eval函数会被调用三次,分别使用训练数据集、验证数据集合测试数据集。
			if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
				print "in"
				saver.save(sess, FLAGS.train_dir, global_step=step)
				print('训练数据的评价 :')
				do_eval(sess,
				    eval_correct,
				    images_placeholder,
				    labels_placeholder,
				    data_sets.train)
				# Evaluate against the validation set.
				print('验证数据评价:')
				do_eval(sess,
				    eval_correct,
				    images_placeholder,
				    labels_placeholder,
				    data_sets.validation)
				# Evaluate against the test set.
				print('测试数据评价:')
				do_eval(sess,
				    eval_correct,
				    images_placeholder,
				    labels_placeholder,
				    data_sets.test)
示例#13
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    if DATASET == 'pickle':
        data_sets = mnist_dataset.read_data_sets(FLAGS.train_dir,
                                                 data_dir='../mnist.pkl')
    elif DATASET == 'keras':
        data_sets = mnist_dataset.read_data_sets(FLAGS.train_dir, keras=True)
    else:
        data_sets = input_data.read_data_sets(FLAGS.train_dir)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2,
                           FLAGS.hidden3)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = training(loss)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.merge_all_summaries()

        # Add the variable initializer Op.
        init = tf.initialize_all_variables()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)

        steps_per_epoch = data_sets.train.num_examples // FLAGS.batch_size
        # Start the training loop.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % steps_per_epoch == 0:
                # Print status to stdout.
                print('Epoch %d: loss = %.2f (%.3f sec)' %
                      (step / steps_per_epoch, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 10000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)

        # save weights
        with tf.variable_scope("hidden1", reuse=True):
            W = tf.get_variable("weights", [IMAGE_PIXELS, FLAGS.hidden1])
            b = tf.get_variable("biases", [FLAGS.hidden1])
            W_1, b_1 = W.eval(sess), b.eval(sess)
            #np.savetxt("weights/hidden1_W.csv", W_val, delimiter=",")
            #np.savetxt("weights/hidden1_b.csv", b_val, delimiter=",")

        with tf.variable_scope("hidden2", reuse=True):
            W = tf.get_variable("weights", [FLAGS.hidden1, FLAGS.hidden2])
            b = tf.get_variable("biases", [FLAGS.hidden2])
            W_2, b_2 = W.eval(sess), b.eval(sess)
            #np.savetxt("weights/hidden2_W.csv", W_val, delimiter=",")
            #np.savetxt("weights/hidden2_b.csv", b_val, delimiter=",")

        with tf.variable_scope("hidden3", reuse=True):
            W = tf.get_variable("weights", [FLAGS.hidden2, FLAGS.hidden3])
            b = tf.get_variable("biases", [FLAGS.hidden3])
            W_3, b_3 = W.eval(sess), b.eval(sess)
            #np.savetxt("weights/hidden3_W.csv", W_val, delimiter=",")
            #np.savetxt("weights/hidden3_b.csv", b_val, delimiter=",")
        cPickle.dump([W_1, b_1, W_2, b_2, W_3, b_3],
                     open("pretrain.pkl", "wb"))
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)

        # Compute embeddings and save them.
        thumbnail_size = int(np.sqrt(mnist.IMAGE_PIXELS))
        for data_set, name in [(data_sets.train, 'train'),
                               (data_sets.validation, 'validation'),
                               (data_sets.test, 'test')]:
            output_path = os.path.join(FLAGS.log_dir, 'embed', name)
            print('Computing %s Embedding' % name)
            (all_images, all_labels, hidden1_vectors,
             hidden2_vectors) = do_eval(sess, eval_correct, images_placeholder,
                                        labels_placeholder, data_set, True)
            embed_tensors = []
            summary_writer = tf.summary.FileWriter(output_path, sess.graph)
            config = projector.ProjectorConfig()
            for layer, embed_vectors in enumerate(
                [hidden1_vectors, hidden2_vectors]):
                embed_tensor = tf.Variable(np.array(embed_vectors).reshape(
                    len(embed_vectors) * embed_vectors[0].shape[0], -1),
                                           name=('%s_layer_%s' %
                                                 (name, layer)))
                embed_tensors.append(embed_tensor)
                sess.run(embed_tensor.initializer)
                embedding = config.embeddings.add()
                embedding.tensor_name = embed_tensor.name
                embedding.metadata_path = os.path.join(output_path,
                                                       'labels.tsv')
                embedding.sprite.image_path = os.path.join(
                    output_path, 'sprite.png')
                embedding.sprite.single_image_dim.extend(
                    [thumbnail_size, thumbnail_size])
                projector.visualize_embeddings(summary_writer, config)
            result = sess.run(embed_tensors)
            saver = tf.train.Saver(embed_tensors)
            saver.save(sess, os.path.join(output_path, 'model.ckpt'), layer)

            # Make sprite and labels.
            images = np.array(all_images).reshape(
                -1, thumbnail_size, thumbnail_size).astype(np.float32)
            sprite = images_to_sprite(images)
            scipy.misc.imsave(os.path.join(output_path, 'sprite.png'), sprite)
            all_labels = np.array(all_labels).flatten()
            metadata_file = open(os.path.join(output_path, 'labels.tsv'), 'w')
            metadata_file.write('Name\tClass\n')
            for ll in xrange(len(all_labels)):
                metadata_file.write('%06d\t%d\n' % (ll, all_labels[ll]))
            metadata_file.close()
示例#15
0
文件: runMnist.py 项目: darr/tfrun
    def run_train(self):
        with tf.Graph().as_default():
            isTrain = tf.placeholder(tf.bool, name="isTrain")
            images, labels = inputs(train=isTrain,
                                    batch_size=100,
                                    num_epochs=500)
            logits = mnist.inference(images, 128, 32)
            loss = mnist.loss(logits, labels)
            tf.summary.scalar('loss', loss)
            train_op = mnist.training(loss, 0.01)
            evaluation = mnist.evaluation(logits, labels)
            tf.summary.scalar('evaluation', evaluation)
            cur_step = tf.Variable(0, name='cur_step')
            summary = tf.summary.merge_all()
            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            saver = tf.train.Saver()

            sess = tf.Session()
            self._graph = sess.graph
            sess.run(init_op)
            self.write_graph_to_file()
            sess = sessionRun(saver, sess, self.FLAGS.ckpt_dir + DIR_NAME)

            if self.FLAGS.debug and self.FLAGS.tensorboard_debug_address:
                raise ValueError(
                    "The --debug and --tensorboard_debug_address flags are mutually "
                    "exclusive.")
            if self.FLAGS.debug:
                sess = tf_debug.LocalCLIDebugWrapperSession(
                    sess, ui_type=self.FLAGS.ui_type)
            elif self.FLAGS.tensorboard_debug_address:
                sess = tf_debug.TensorBoardDebugWrapperSession(
                    sess, self.FLAGS.tensorboard_debug_address)

            summary_writer = tf.summary.FileWriter(self.FLAGS.log_dir,
                                                   sess.graph)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            start = sess.run(cur_step)

            try:
                step = start
                while not coord.should_stop():
                    start_time = time.time()
                    _, loss_value, prd, summary_str = sess.run(
                        [train_op, loss, evaluation, summary],
                        feed_dict={isTrain: True})
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()
                    print(step)
                    duration = time.time() - start_time

                    if step % 100 == 0:
                        cs = sess.run(cur_step.assign(step))
                        saveModel(saver, sess, step,
                                  self.FLAGS.ckpt_dir + DIR_NAME)
                        print("Step :%d: loss=%.2f (%.3f sec) evaluation:%s" %
                              (step, loss_value, duration, prd))
                    step += 1

                    if step > 3000:
                        prd = sess.run(evaluation, feed_dict={isTrain: False})
                        print("loss value:%s evaluation:%s" %
                              (loss_value, prd))
                        break
            except tf.errors.OutOfRangeError:
                print("Done training for %d epochs, %d steps." % (1000, step))
            finally:
                coord.request_stop()
            coord.join(threads)
            sess.close()
def run_training():
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    #告诉TensorFlow将该模型内置到默认图形中
    with tf.Graph().as_default():
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        #构建一个从推理模型计算预测的图表
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)
        #添加到图表中用于损失函数计算
        loss = mnist.loss(logits, labels_placeholder)

        #添加到图表计算和应用梯度的操作
        train_op = mnist.training(loss, FLAGS.learning_rate)

        #添加Op,以便在评估过程中将logit与标签进行比较。
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        #根据汇总的TF收集构建摘要Tensor
        summary = tf.summary.merge_all()

        #变量初始化
        init = tf.global_variables_initializer()

        #创建一个保存程序来编写训练检查点
        saver = tf.train.Saver()

        #在图上建立会话
        sess = tf.Session()

        #实例化一个SummaryWriter以输出摘要和Graph
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        sess.run(init)

        #开始循环训练
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            #撰写摘要,并经常打印概述
            if step % 100 == 0:
                print('Step %d:loss = %.0f(%.3f sec)' %
                      (step, loss_value, duration))

                #更新事件文件
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            #保存检查点并定期评估模型
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)

                #评估训练集
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)

                #评估验证集
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                #评估测试集
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#17
0
def run_training():
    """Train MNIST for a number of epochs."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        with tf.name_scope('input'):
            # Input data, pin to CPU because rest of pipeline is CPU-only
            with tf.device('/cpu:0'):
                input_images = tf.constant(data_sets.train.images)
                input_labels = tf.constant(data_sets.train.labels)

            image, label = tf.train.slice_input_producer(
                [input_images, input_labels],
                num_epochs=FLAGS.num_epochs,
                capacity=3000000)
            label = tf.cast(label, tf.int32)
            images, labels = tf.train.shuffle_batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                capacity=3000000,
                num_threads=24,
                min_after_dequeue=300)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create the op for initializing variables.
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Run the Op to initialize the variables.
        sess.run(init_op)

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        # Start input enqueue threads.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # And then after everything is built, start the training loop.
        try:
            step = 0
            while not coord.should_stop():
                start_time = time.time()

                # Run one step of the model.
                _, loss_value = sess.run([train_op, loss],
                                         options=run_options,
                                         run_metadata=run_metadata)
                tl = timeline.Timeline(run_metadata.step_stats)
                ctf = tl.generate_chrome_trace_format()
                with open('timeline.json', 'w') as f:
                    f.write(ctf)

                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                if step % 1 == 0:
                    # Print status to stdout.
                    print('Step %d: loss = %.2f (%.3f sec)' %
                          (step, loss_value, duration))
                    # Update the events file.
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)
                    step += 1

                # Save a checkpoint periodically.
                if (step + 1) % 1000 == 0:
                    print('Saving')
                    saver.save(sess, FLAGS.train_dir, global_step=step)

                step += 1
        except tf.errors.OutOfRangeError:
            print('Saving')
            saver.save(sess, FLAGS.train_dir, global_step=step)
            print('Done training for %d epochs, %d steps.' %
                  (FLAGS.num_epochs, step))
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()

        # Wait for threads to finish.
        coord.join(threads)
        sess.close()
示例#18
0
def run_training():
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops fro loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summarys.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on teh Graph.
        sess = tf.Session()

        # Instantiate a SummaryWrite to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And after everything is build.

        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop.
        for step in range(FLAGS.max_steps):
            start_time = time.time()

            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                print('Step %d:  loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))

                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)

                # Evaluate against the training set.
                print('Training Data Eval.')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)

                # Evaluate against the validation set.
                print('Validation Data Eval.')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)

                # Evaluate against the test set.
                print('Test Data Eval.')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#19
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        # BOT: making the lr a variable so we can update it using our bot
        learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False)
        train_op = mnist.training(loss, learning_rate)
        bot.lr = FLAGS.learning_rate
        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # Print step number:
                print("step: {}".format(step))

                # Evaluate against the training set.
                print('Training Data Eval:')
                message_trn = 'Training Data Eval: \n' + do_eval(
                    sess, eval_correct, images_placeholder, labels_placeholder,
                    data_sets.train)

                # Evaluate against the validation set.
                print('Validation Data Eval:')
                message_val = 'Validation Data Eval:\n' + do_eval(
                    sess, eval_correct, images_placeholder, labels_placeholder,
                    data_sets.validation)
                # Evaluate validation loss
                val_loss_value = sess.run(loss,
                                          feed_dict=fill_feed_dict(
                                              data_sets.validation,
                                              images_placeholder,
                                              labels_placeholder))

                # Evaluate against the test set.
                print('Test Data Eval:')
                message_tst = 'Test Data Eval:\n' + do_eval(
                    sess, eval_correct, images_placeholder, labels_placeholder,
                    data_sets.test)

                ## BOT: handling of all bot commands ##
                # Prepare bot update message
                message = "\n".join([
                    "step: {}".format(step + 1), message_trn, message_val,
                    message_tst
                ])
                bot.set_status(message)
                # Send update message
                if bot.verbose:
                    bot.send_message(message)

                # Stop training command from bot
                if bot.stop_train_flag:
                    bot.send_message('Training stopped!')
                    print(
                        'Training Stopped! Stop command sent via Telegram bot.'
                    )
                    break

                # Update bot's loss history (for /plot command)
                bot.loss_hist.append(loss_value)
                bot.val_loss_hist.append(val_loss_value)

                # Modify learning rate via bot
                if bot.modify_lr != 1:
                    curr_lr = sess.run(learning_rate)
                    new_lr = curr_lr * bot.modify_lr
                    learning_rate = tf.assign(learning_rate, new_lr)
                    message = '\nStep %05d: setting learning rate to %f.' % (
                        step + 1, new_lr)
                    print(message)
                    bot.send_message(message)
                    bot.modify_lr = 1
                    bot.lr = new_lr
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder, keep_prob = placeholder_inputs(
        FLAGS.batch_size)

    # Build a Graph that computes predictions from the inference model.                            
    #preds=simplenet (images_placeholder)
   
    simplenet=somenetclass(784)
 
    preds=simplenet.predict(images_placeholder,keep_prob)

    # Add to the Graph the Ops for loss calculation.
    labels_int64 = tf.to_int64(labels_placeholder)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels=labels_int64, logits=preds, name='cross-entropy')
    loss =tf.reduce_mean(cross_entropy, name='cross-entropy_mean')
    

    # Add to the Graph the Ops that calculate and apply gradients.
    tf.summary.scalar('loss', loss)
    # Create the gradient descent optimizer with the given learning rate.
    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)

    # Create a variable to track the global step.
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Use the optimizer to apply the gradients that minimize the loss
    # (and also increment the global step counter) as a single training step.
    train_op = optimizer.minimize(loss, global_step=global_step)
    
    

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(preds, labels_placeholder)


    # Build the summary Tensor based on the TF collection of Summaries.
    summary = tf.summary.merge_all()

    # Add the variable initializer Op.
    init = tf.global_variables_initializer()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
    #summary_writer.flush()
    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.


    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

      # Fill a feed dictionary with the actual set of images and labels
      # for this particular training step.
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder, keep_prob,0.4)

      # Run one step of the model.  The return values are the activations
      # from the `train_op` (which is discarded) and the `loss` Op.  To
      # inspect the values of your Ops or variables, you may include them
      # in the list passed to sess.run() and the value tensors will be
      # returned in the tuple from the call.
      _, loss_value = sess.run([train_op, loss],
                               feed_dict=feed_dict)

      duration = time.time() - start_time



      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_file = os.path.join(FLAGS.log_dir, 'model_iter'+str(step)+'.ckpt')
        saver.save(sess, checkpoint_file, global_step=step)
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                keep_prob,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                keep_prob,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                keep_prob,
                data_sets.test)




        #python -m tensorflow.tensorboard --logdir=/tmp/tensorflow/mnist/logs/fully_connected_feed
    
      # Write the summaries and print an overview fairly often.
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
        # Update the events file.
        summary_str = sess.run(summary, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()
示例#21
0
                             'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', 'data', 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
                             'for unit testing.')

## Download data and unpack
## data_sets is a custom DataSet data type
data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

## Initialize graph and start drawing on it
with tf.Graph().as_default():
    ## Prepare inputs and placeholders
    images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size,
                                                            mnist.IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(FLAGS.batch_size))

    ## mnist.inference() builds feed-forward portion of graph
    ## It takes the images placeholder and two integers, each representing the
    ## number of neurons for the respective hidden layers and returns logits
    logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)
    loss = mnist.loss(logits, labels_placeholder)
    train_op = mnist.training(loss, FLAGS.learning_rate)
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    ## Initialize variables, run session, and write summary writer data
    summary_op = tf.merge_all_summaries()
    init = tf.initialize_all_variables()
    sess = tf.Session()
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
    sess.run(init)
def run_training():
  """Train MNIST for a number of epochs."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    with tf.name_scope('input'):
      # Input data, pin to CPU because rest of pipeline is CPU-only
      with tf.device('/cpu:0'):
        input_images = tf.constant(data_sets.train.images)
        input_labels = tf.constant(data_sets.train.labels)

      image, label = tf.train.slice_input_producer(
          [input_images, input_labels], num_epochs=FLAGS.num_epochs)
      label = tf.cast(label, tf.int32)
      images, labels = tf.train.batch(
          [image, label], batch_size=FLAGS.batch_size)

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create the op for initializing variables.
    init_op = tf.initialize_all_variables()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Run the Op to initialize the variables.
    sess.run(init_op)

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    # Start input enqueue threads.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # And then after everything is built, start the training loop.
    try:
      step = 0
      while not coord.should_stop():
        start_time = time.time()

        # Run one step of the model.
        _, loss_value = sess.run([train_op, loss])

        duration = time.time() - start_time

        # Write the summaries and print an overview fairly often.
        if step % 100 == 0:
          # Print status to stdout.
          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                     duration))
          # Update the events file.
          summary_str = sess.run(summary_op)
          summary_writer.add_summary(summary_str, step)
          step += 1

        # Save a checkpoint periodically.
        if (step + 1) % 1000 == 0:
          print('Saving')
          saver.save(sess, FLAGS.train_dir, global_step=step)

        step += 1
    except tf.errors.OutOfRangeError:
      print('Saving')
      saver.save(sess, FLAGS.train_dir, global_step=step)
      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
    finally:
      # When done, ask the threads to stop.
      coord.request_stop()

    # Wait for threads to finish.
    coord.join(threads)
    sess.close()
示例#23
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST. If input_path is specified, download the data from GCS to
    # the folder expected by read_data_sets.
    data_dir = tempfile.mkdtemp()
    if FLAGS.input_path:
        files = [
            os.path.join(FLAGS.input_path, file_name)
            for file_name in INPUT_FILES
        ]
        subprocess.check_call(['gsutil', '-m', '-q', 'cp', '-r'] + files +
                              [data_dir])
    data_sets = input_data.read_data_sets(data_dir, FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels and mark as input.
        placeholders = placeholder_inputs()
        keys_placeholder, images_placeholder, labels_placeholder = placeholders

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # To be able to extract the id, we need to add the identity function.
        keys = tf.identity(keys_placeholder)

        # The prediction will be the index in logits with the highest score.
        # We also use a softmax operation to produce a probability distribution
        # over all possible digits.
        prediction = tf.argmax(logits, 1)
        scores = tf.nn.softmax(logits)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary operation based on the TF collection of Summaries.
        # Remove this if once Tensorflow 0.12 is standard.
        try:
            summary_op = tf.contrib.deprecated.merge_all_summaries()
        except AttributeError:
            summary_op = tf.merge_all_summaries()

        # Add the variable initializer Op.
        init = tf.initialize_all_variables()

        # Create a saver for writing legacy training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        # Remove this if once Tensorflow 0.12 is standard.
        try:
            summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
        except AttributeError:
            summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                    sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary_op, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)

        file_io.create_dir(FLAGS.model_dir)

        # Create a saver for writing SavedModel training checkpoints.
        saved_model_util.simple_save(sess,
                                     os.path.join(FLAGS.model_dir,
                                                  'saved_model'),
                                     inputs={
                                         'key': keys_placeholder,
                                         'image': images_placeholder
                                     },
                                     outputs={
                                         'key': keys,
                                         'prediction': prediction,
                                         'scores': scores
                                     })
        logging.debug('Saved model path %s',
                      os.path.join(FLAGS.model_dir, 'saved_model'))
示例#24
0
文件: task.py 项目: obulpathi/cloud
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(tempfile.mkdtemp(), FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels and mark as input.
    placeholders = placeholder_inputs()
    keys_placeholder, images_placeholder, labels_placeholder = placeholders
    inputs = {'key': keys_placeholder.name, 'image': images_placeholder.name}
    tf.add_to_collection('inputs', json.dumps(inputs))

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images_placeholder,
                             FLAGS.hidden1,
                             FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # To be able to extract the id, we need to add the identity function.
    keys = tf.identity(keys_placeholder)

    # The prediction will be the index in logits with the highest score.
    # We also use a softmax operation to produce a probability distribution
    # over all possible digits.
    prediction = tf.argmax(logits, 1)
    scores = tf.nn.softmax(logits)

    # Mark the outputs.
    outputs = {'key': keys.name,
               'prediction': prediction.name,
               'scores': scores.name}
    tf.add_to_collection('outputs', json.dumps(outputs))

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Add the variable initializer Op.
    init = tf.initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

      # Fill a feed dictionary with the actual set of images and labels
      # for this particular training step.
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder)

      # Run one step of the model.  The return values are the activations
      # from the `train_op` (which is discarded) and the `loss` Op.  To
      # inspect the values of your Ops or variables, you may include them
      # in the list passed to sess.run() and the value tensors will be
      # returned in the tuple from the call.
      _, loss_value = sess.run([train_op, loss],
                               feed_dict=feed_dict)

      duration = time.time() - start_time

      # Write the summaries and print an overview fairly often.
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
        # Update the events file.
        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()

      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
        saver.save(sess, checkpoint_file, global_step=step)
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.test)

    # Export the model so that it can be loaded and used later for predictions.
    file_io.create_dir(FLAGS.model_dir)
    saver.save(sess, os.path.join(FLAGS.model_dir, 'export'))
示例#25
0
def run_training():
    """
    Train MNIST for a number of steps.
        """

    # Ensures the correct data has been downloaded and unpacks it into a dict of
    # DataSet instances.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)

    # Tell TF that the model will be built into the default Graph.
    # 'with' command indicates all of the ops are associated with the specified
    # instance - this being the default global tf.Graph instance
    # A tf.Graph is a collection of ops that may be executed together as a group.
    with tf.Graph().as_default():
        # Generate placeholders
        images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size)

        # Build a graph that computes predictions from the inference model.
        # Inference function builds the graph as far as needed to return the tensor
        #   containing output predictions.
        # Takes images placeholder in and builds on top a pair of fully connected layers.
        #   using ReLU activation. It then has a ten node linear layer with outputs.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)

        # Add the ops for loss calculation
        loss = mnist.loss(logits, labels_placeholder)

        # Add ops that calculate and apply gradients
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add op to compare logits to labels during evaluation
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Summary tensor based on collection of summaries
        summary = tf.summary.merge_all()

        # Add the variable initalizer
        init = tf.global_variables_initializer()

        # Create a saver
        saver = tf.train.Saver()

        # Create a session for running ops
        # Alternatively, could do 'with tf.Session() as sess:'
        sess = tf.Session()

        # Instantiate SummaryWriter for output
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        ### Built everything ! ###

        # Now run and train.
        # run() will complete the subset of the graph as corresponding to the
        #   ops described above. Thus, only init() is given.
        sess.run(init)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with actual set of images
            feed_dict = fill_feed_dict(data_sets.train,
                                       images_placeholder,
                                       labels_placeholder)

            # Run a step.
            # What is returned is the activations from the training_op
            # and the loss operation.
            # If you want to insepct the values of ops or variables, include
            # them in the list passed to sess.run()

            # Each tensor in the list of values corresponds to a numpy array in the returned tuple.
            # This is filled with the value of that tensor during this step of training.
            # Since train_op is an Operation with no output value, it can be discarded.
            # BUT...if loss becomes NaN, the model has likely diverged during training.


            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Let's log some stuff so we know we're doing ok.
            if step % 100 == 0:
                print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))

                #Update events file
                # This can be used by TensorBoard  to display the summaries.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                    checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_file, global_step=step)

            print('Training Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.train)
            # Evaluate against the validation set.
            print('Validation Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.validation)
            # Evaluate against the test set.
            print('Test Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.test)
    def downpour_training_local_op(self):
        """
        Validation baseline function: run locally.
        """
        # Tell TensorFlow that the model will be built into the default Graph.
        with tf.Graph().as_default():
            FLAGS = self.flags.FLAGS
            images_placeholder, labels_placeholder = self.placeholder_inputs(
                FLAGS.batch_size)

            # Do inference:
            logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                     FLAGS.hidden2)

            # Calculate loss after generating logits:
            loss = mnist.loss(logits, labels_placeholder)

            # Add loss to training:
            train_op = mnist.training(loss, FLAGS.learning_rate)

            # Add summary
            summary = tf.merge_all_summaries()

            # Add the Op to compare the logits to the labels during evaluation.
            eval_correct = mnist.evaluation(logits, labels_placeholder)

            # Initialize Variable
            init = tf.initialize_all_variables()

            sess = tf.Session()

            # Instantiate a SummaryWriter to output summaries and the Graph.
            summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                    sess.graph)

            sess.run(init)

            for step in range(FLAGS.max_steps + 1):
                """
                We want to inspect loss value on each step as a local benchmark
                for fully connected network.
                """

                start_time = time.time()
                feed_dict = self.fill_feed_dict(self.data_set.train,
                                                images_placeholder,
                                                labels_placeholder)

                # Run one step of the model.  The return values are the activations
                # from the `train_op` (which is discarded) and the `loss` Op.  To
                # inspect the values of your Ops or variables, you may include them
                # in the list passed to sess.run() and the value tensors will be
                # returned in the tuple from the call.
                _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                if step % 100 == 0:
                    # Print status to stdout.
                    print('Step %d: loss = %.2f (%.3f sec)' %
                          (step, loss_value, duration))
                    summary_str = sess.run(summary, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, step)
                    summary_writer.flush()

                # Save a checkpoint and evaluate the model periodically.
                if step % 1000 == 0:
                    print('Training Data Eval:')
                    self.do_eval(sess, eval_correct, images_placeholder,
                                 labels_placeholder, self.data_set.train)
                    # Evaluate against the validation set.
                    print('Validation Data Eval:')
                    self.do_eval(sess, eval_correct, images_placeholder,
                                 labels_placeholder, self.data_set.validation)
                    # Evaluate against the test set.
                    print('Test Data Eval:')
                    self.do_eval(sess, eval_correct, images_placeholder,
                                 labels_placeholder, self.data_set.test)
示例#27
0
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  train_dir = tempfile.mkdtemp()
  data_sets = input_data.read_data_sets(train_dir, FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder = placeholder_inputs()

    # Build a Graph that computes predictions from the inference model.
    logits, clustering_loss, kmeans_training_op = inference(images_placeholder,
                                                            FLAGS.num_clusters,
                                                            FLAGS.hidden1,
                                                            FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = tf.group(mnist.training(loss, FLAGS.learning_rate),
                        kmeans_training_op)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Add the variable initializer Op.
    init = tf.initialize_all_variables()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    feed_dict = fill_feed_dict(data_sets.train,
                               images_placeholder,
                               labels_placeholder,
                               batch_size=5000)
    # Run the Op to initialize the variables.
    sess.run(init, feed_dict=feed_dict)

    # Start the training loop.
    max_test_prec = 0
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

      # Fill a feed dictionary with the actual set of images and labels
      # for this particular training step.
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder,
                                 FLAGS.batch_size)

      # Run one step of the model.
      _, loss_value, clustering_loss_value = sess.run([train_op,
                                                       loss,
                                                       clustering_loss],
                                                      feed_dict=feed_dict)

      duration = time.time() - start_time
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f, clustering_loss = %.2f (%.3f sec)' % (
            step, loss_value, clustering_loss_value, duration))

      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        test_prec = do_eval(sess,
                            eval_correct,
                            images_placeholder,
                            labels_placeholder,
                            data_sets.test)
        max_test_prec = max(max_test_prec, test_prec)
    return max_test_prec
示例#28
0
def run_training():
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir)
    max_steps = math.ceil(CONFIG.epoch * data_sets.train.num_examples /
                          CONFIG.batch_size)

    with tf.Graph().as_default():
        images_placeholder, labels_placeholder = placeholder_inputs(
            CONFIG.batch_size)

        logits = mnist.inference(images_placeholder, CONFIG.size_hidden_1,
                                 CONFIG.size_hidden_2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, CONFIG.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        if FLAGS.c:
            saver.restore(sess, os.path.join(FLAGS.log_dir, 'model.ckpt'))

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)
        progbar = Progbar(target=CONFIG.eval_every_n_steps)
        for step in xrange(max_steps):

            start_time = time.time()

            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            progbar.update((step % CONFIG.eval_every_n_steps) + 1,
                           [("Loss", loss_value)],
                           force=True)

            duration = time.time() - start_time

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % CONFIG.eval_every_n_steps == 0 or (step +
                                                               1) == max_steps:

                print("Total : ", int(
                    (step + 1) / CONFIG.eval_every_n_steps), "/",
                      int(math.ceil(max_steps / CONFIG.eval_every_n_steps)))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)

                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)

                progbar = Progbar(target=CONFIG.eval_every_n_steps)
示例#29
0
def run_training():
    '''
		Training MNIST for number of steps
	'''
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    #Tell Tensorflow that model will be built in default Graph
    with tf.Graph().as_default():
        #Generate Placeholders for input
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        #Build a Graph that Computes predictions from inference models
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        #Add to the Graph ops for calculating loss
        loss = mnist.loss(logits, labels_placeholder)

        #Add to the Graph ops that calculate and apply gradients
        train_op = mnist.training(loss, FLAGS.learning_rate)

        #Add to Graph ops to compare logits to label during evaluation
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        #Build summary tensor based on TF collection of summaries.
        summary = tf.summary.merge_all()

        init = tf.global_variables_initializer()

        #Saving checkpoints of training
        saver = tf.train.Saver()

        sess = tf.Session()

        #Instantiate SummaryWriter to write summaries
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        sess.run(init)

        #Start training Loop
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            #Run one step of model
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            #Write summaries and print overview
            if step % 100 == 0:

                print('Step %d : loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            #Save Checkpoint and evaluate model
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)

                #Evaluating against training set
                print('Training Data Eval ')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)

                print('Validation Data Eval ')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)

                print('Testing Data Eval ')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#30
0
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(tempfile.mkdtemp(), FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels and mark as input.
    placeholders = placeholder_inputs()
    keys_placeholder, images_placeholder, labels_placeholder = placeholders
    inputs = {'key': keys_placeholder.name, 'image': images_placeholder.name}
    tf.add_to_collection('inputs', json.dumps(inputs))

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images_placeholder,
                             FLAGS.hidden1,
                             FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # To be able to extract the id, we need to add the identity function.
    keys = tf.identity(keys_placeholder)

    # The prediction will be the index in logits with the highest score.
    # We also use a softmax operation to produce a probability distribution
    # over all possible digits.
    prediction = tf.argmax(logits, 1)
    scores = tf.nn.softmax(logits)

    # Mark the outputs.
    outputs = {'key': keys.name,
               'prediction': prediction.name,
               'scores': scores.name}
    tf.add_to_collection('outputs', json.dumps(outputs))

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Build the summary operation based on the TF collection of Summaries.
    # TODO(b/33420312): remove the if once 0.12 is fully rolled out to prod.
    if tf.__version__ < '0.12':
      summary_op = tf.merge_all_summaries()
    else:
      summary_op = tf.contrib.deprecated.merge_all_summaries()

    # Add the variable initializer Op.
    init = tf.initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

      # Fill a feed dictionary with the actual set of images and labels
      # for this particular training step.
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder)

      # Run one step of the model.  The return values are the activations
      # from the `train_op` (which is discarded) and the `loss` Op.  To
      # inspect the values of your Ops or variables, you may include them
      # in the list passed to sess.run() and the value tensors will be
      # returned in the tuple from the call.
      _, loss_value = sess.run([train_op, loss],
                               feed_dict=feed_dict)

      duration = time.time() - start_time

      # Write the summaries and print an overview fairly often.
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
        # Update the events file.
        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()

      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
        saver.save(sess, checkpoint_file, global_step=step)
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.test)

    # Export the model so that it can be loaded and used later for predictions.
    file_io.create_dir(FLAGS.model_dir)
    saver.save(sess, os.path.join(FLAGS.model_dir, 'export'))
示例#31
0
def run_training():
    """Train MNIST for a number of epochs."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        with tf.name_scope('input'):
            # Input data
            images_initializer = tf.placeholder(
                dtype=data_sets.train.images.dtype,
                shape=data_sets.train.images.shape)
            labels_initializer = tf.placeholder(
                dtype=data_sets.train.labels.dtype,
                shape=data_sets.train.labels.shape)
            input_images = tf.Variable(images_initializer,
                                       trainable=False,
                                       collections=[])
            input_labels = tf.Variable(labels_initializer,
                                       trainable=False,
                                       collections=[])

            image, label = tf.train.slice_input_producer(
                [input_images, input_labels], num_epochs=FLAGS.num_epochs)
            label = tf.cast(label, tf.int32)
            images, labels = tf.train.batch([image, label],
                                            batch_size=FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images, FLAGS.hidden1, FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create the op for initializing variables.
        init_op = tf.initialize_all_variables()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Run the Op to initialize the variables.
        sess.run(init_op)
        sess.run(input_images.initializer,
                 feed_dict={images_initializer: data_sets.train.images})
        sess.run(input_labels.initializer,
                 feed_dict={labels_initializer: data_sets.train.labels})

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                graph_def=sess.graph_def)

        # Start input enqueue threads.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # And then after everything is built, start the training loop.
        try:
            step = 0
            while not coord.should_stop():
                start_time = time.time()

                # Run one step of the model.
                _, loss_value = sess.run([train_op, loss])

                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                if step % 100 == 0:
                    # Print status to stdout.
                    print('Step %d: loss = %.2f (%.3f sec)' %
                          (step, loss_value, duration))
                    # Update the events file.
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, step)
                    step += 1

                # Save a checkpoint periodically.
                if (step + 1) % 1000 == 0:
                    print('Saving')
                    saver.save(sess, FLAGS.train_dir, global_step=step)

                step += 1
        except tf.errors.OutOfRangeError:
            print('Saving')
            saver.save(sess, FLAGS.train_dir, global_step=step)
            print('Done training for %d epochs, %d steps.' %
                  (FLAGS.num_epochs, step))
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()

        # Wait for threads to finish.
        coord.join(threads)
        sess.close()
示例#32
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    ps_hosts = FLAGS.ps_hosts.split(',')
    worker_hosts = FLAGS.worker_hosts.split(',')
    task_index = FLAGS.task_index
    master = "grpc://" + worker_hosts[task_index]
    logs_path = os.path.join(FLAGS.log_dir, str(task_index))

    # start a server for a specific task
    cluster = tf.train.ClusterSpec({'ps': ps_hosts, 'worker': worker_hosts})

    # Between-graph replication
    with tf.device(
            tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % task_index,
                cluster=cluster)):

        # count the number of updates
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = async_training(loss, FLAGS.learning_rate, global_step)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Add the variable initializer Op.
        init_op = tf.global_variables_initializer()

        sv = tf.train.Supervisor(is_chief=(task_index == 0),
                                 global_step=global_step,
                                 init_op=init_op)

        with sv.prepare_or_wait_for_session(master) as sess:

            # Instantiate a SummaryWriter to output summaries and the Graph.
            summary_writer = tf.summary.FileWriter(logs_path, sess.graph)

            # And then after everything is built:
            # Start the training loop.
            for step in xrange(FLAGS.max_steps):
                start_time = time.time()

                # Fill a feed dictionary with the actual set of images and labels
                # for this particular training step.
                feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                           labels_placeholder)

                # Run one step of the model.  The return values are the activations
                # from the `train_op` (which is discarded) and the `loss` Op.  To
                # inspect the values of your Ops or variables, you may include them
                # in the list passed to sess.run() and the value tensors will be
                # returned in the tuple from the call.
                _, loss_value, summary = sess.run([train_op, loss, summary_op],
                                                  feed_dict=feed_dict)

                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                if step % 100 == 0:
                    # Print status to stdout.
                    print('Step %d: loss = %.2f (%.3f sec)' %
                          (step, loss_value, duration))
                    # Update the events file.
                    summary_writer.add_summary(summary, step)
                    summary_writer.flush()

                # Save a checkpoint and evaluate the model periodically.
                if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                    # Evaluate against the training set.
                    print('Training Data Eval:')
                    do_eval(sess, eval_correct, images_placeholder,
                            labels_placeholder, data_sets.train)
                    # Evaluate against the validation set.
                    print('Validation Data Eval:')
                    do_eval(sess, eval_correct, images_placeholder,
                            labels_placeholder, data_sets.validation)
                    # Evaluate against the test set.
                    print('Test Data Eval:')
                    do_eval(sess, eval_correct, images_placeholder,
                            labels_placeholder, data_sets.test)
示例#33
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    train_dir = tempfile.mkdtemp()
    data_sets = input_data.read_data_sets(train_dir, FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs()

        # Build a Graph that computes predictions from the inference model.
        logits, clustering_loss, kmeans_training_op = inference(
            images_placeholder, FLAGS.num_clusters, FLAGS.hidden1,
            FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = tf.group(mnist.training(loss, FLAGS.learning_rate),
                            kmeans_training_op)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Add the variable initializer Op.
        init = tf.initialize_all_variables()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        feed_dict = fill_feed_dict(data_sets.train,
                                   images_placeholder,
                                   labels_placeholder,
                                   batch_size=5000)
        # Run the Op to initialize the variables.
        sess.run(init, feed_dict=feed_dict)

        # Start the training loop.
        max_test_prec = 0
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder, FLAGS.batch_size)

            # Run one step of the model.
            _, loss_value, clustering_loss_value = sess.run(
                [train_op, loss, clustering_loss], feed_dict=feed_dict)

            duration = time.time() - start_time
            if step % 100 == 0:
                # Print status to stdout.
                print(
                    'Step %d: loss = %.2f, clustering_loss = %.2f (%.3f sec)' %
                    (step, loss_value, clustering_loss_value, duration))

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                test_prec = do_eval(sess, eval_correct, images_placeholder,
                                    labels_placeholder, data_sets.test)
                max_test_prec = max(max_test_prec, test_prec)
        return max_test_prec
示例#34
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    # fake_dataは単体テストのために使われるフラグ。今は無視してOK。
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    # tf.Graph()のグローバルなデフォルトのインスタンスに対して、行っている操作であることを
    # Pythonのwith構文で記述。
    # 大抵の場合はtf.Graphのインスタンスは単一でOKなので、as_default()を使えばOK
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # mnist.pyに記述されている関数を計算グラフを構築する。
        # Build a Graph that computes predictions from the inference model.
        # 1つ目 inference()
        # 学習したいネットワーク?
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        # 2つ目 loss()
        # loss関数のOps(operation?)をグラフに追加
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        # 3つ目 training()
        # loss関数を最小化するための最適化計算を追加
        # 入力されたloss関数を、どういう手法で最適化するのかを記述している。
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        # 推論結果の評価方法を追加
        # logitsがどういう出力をしていたら良いのかをevaluation()で記述している(?)
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        # 初期化処理を生成しておく
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        # 計算グラフの構築など、必要な操作をすべて生成完了したらtf.Session()を生成する
        # Session()の引数が空であることは、デフォルトのローカル・セッションにアタッチ(使う)ということ。
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        # Session.runを呼ぶことで、変数が初期化される
        sess.run(init)

        # Start the training loop.
        # 各種インスタンス化やOperationの作成・構築が終わったら学習のループを開始
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            # run()に入力する引数が2つなので、出力も2つと覚えれば良い(?)。
            # train_opは学習のOperationであり、出力を持たないのでNoneが返ってくる。破棄する。
            # lossは出力を持つので変数に保持。
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#35
0
def run_training():
    """Train MNIST for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.
    data_sets = input_data.read_data_sets(FLAGS.input_data_dir,
                                          FLAGS.fake_data)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder = placeholder_inputs(
            FLAGS.batch_size)

        # Build a Graph that computes predictions from the inference model.
        logits = mnist.inference(images_placeholder, FLAGS.hidden1,
                                 FLAGS.hidden2)

        # Add to the Graph the Ops for loss calculation.
        loss = mnist.loss(logits, labels_placeholder)

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op = mnist.training(loss, FLAGS.learning_rate)

        # Add the Op to compare the logits to the labels during evaluation.
        eval_correct = mnist.evaluation(logits, labels_placeholder)

        # Build the summary Tensor based on the TF collection of Summaries.
        summary = tf.summary.merge_all()

        # Add the variable initializer Op.
        init = tf.global_variables_initializer()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # And then after everything is built:

        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop.
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()

            # Fill a feed dictionary with the actual set of images and labels
            # for this particular training step.
            feed_dict = fill_feed_dict(data_sets.train, images_placeholder,
                                       labels_placeholder)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            # Save a checkpoint and evaluate the model periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)
                # Evaluate against the training set.
                print('Training Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.train)
                # Evaluate against the validation set.
                print('Validation Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.validation)
                # Evaluate against the test set.
                print('Test Data Eval:')
                do_eval(sess, eval_correct, images_placeholder,
                        labels_placeholder, data_sets.test)
示例#36
0
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST. If input_path is specified, download the data from GCS to
  # the folder expected by read_data_sets.
  data_dir = tempfile.mkdtemp()
  if FLAGS.input_path:
    files = [os.path.join(FLAGS.input_path, file_name)
             for file_name in INPUT_FILES]
    subprocess.check_call(['gsutil', '-m', '-q', 'cp', '-r'] + files +
                          [data_dir])
  data_sets = input_data.read_data_sets(data_dir, FLAGS.fake_data)

  # Tell TensorFlow that the model will be built into the default Graph.
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder = placeholder_inputs(
        FLAGS.batch_size)

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Build the summary operation based on the TF collection of Summaries.
    # Remove this if once Tensorflow 0.12 is standard.
    try:
      summary_op = tf.contrib.deprecated.merge_all_summaries()
    except AttributeError:
      summary_op = tf.merge_all_summaries()

    # Add the variable initializer Op.
    # Remove this if once Tensorflow 0.12 is standard.
    try:
      init = tf.global_variables_initializer()
    except AttributeError:
      init = tf.initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    # Remove this if once Tensorflow 0.12 is standard.
    try:
      summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
    except AttributeError:
      summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

      # Fill a feed dictionary with the actual set of images and labels
      # for this particular training step.
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder)

      # Run one step of the model.  The return values are the activations
      # from the `train_op` (which is discarded) and the `loss` Op.  To
      # inspect the values of your Ops or variables, you may include them
      # in the list passed to sess.run() and the value tensors will be
      # returned in the tuple from the call.
      _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

      duration = time.time() - start_time

      # Write the summaries and print an overview fairly often.
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
        # Update the events file.
        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()

      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
        saver.save(sess, checkpoint_file, global_step=step)
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.test)