Пример #1
def train(verbose=False):
    with tf.Graph().as_default():
        # global step number
        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
        dataset = DataSet()

        # get training set
        print("The number of training images is: %d" % (dataset.cnt_samples(FLAGS.traincsv)))
        images, labels = dataset.csv_inputs(FLAGS.traincsv, FLAGS.batch_size, distorted=True)

        images_debug = datasets.debug(images)

        # get test set
        #test_cnt = dataset.cnt_samples(FLAGS.testcsv)
        test_cnt = 100

        images_test, labels_test = dataset.test_inputs(FLAGS.testcsv, test_cnt)

        images_test_debug = datasets.debug(images_test)

        input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES))

        num_classes = FLAGS.num_classes
        restore_logits = not FLAGS.fine_tune

        # inference
        # logits is tuple (logits, aux_liary_logits, predictions)
        # logits: output of final layer, auxliary_logits: output of hidden layer, softmax: predictions
        logits = model.inference(images, num_classes, for_training=True, restore_logits=restore_logits)
        logits_test = model.inference(images_test, num_classes, for_training=False, restore_logits=restore_logits, reuse=True, dropout_keep_prob=1.0)

        # loss
        model.loss(logits, labels, batch_size=FLAGS.batch_size)
        model.loss_test(logits_test, labels_test, batch_size=test_cnt)
        losses = tf.get_collection(slim.losses.LOSSES_COLLECTION)
        losses_test = tf.get_collection(slim.losses.LOSSES_COLLECTION_TEST)

        # Calculate the total loss for the current tower.
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n(losses + regularization_losses, name='total_loss')
        #total_loss = tf.add_n(losses, name='total_loss')
        total_loss_test = tf.add_n(losses_test, name='total_loss_test')

        # Compute the moving average of all individual losses and the total loss.
        loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
        loss_averages_op = loss_averages.apply(losses + [total_loss])
        loss_averages_test = tf.train.ExponentialMovingAverage(0.9, name='avg_test')
        loss_averages_op_test = loss_averages_test.apply(losses_test + [total_loss_test])

        if verbose:
            print "="*10
            print "loss length:"
            print len(losses)
            print len(losses_test)
            print "="*10

        # for l in losses + [total_loss]:
        #     # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
        #     # session. This helps the clarity of presentation on TensorBoard.
        #     loss_name = re.sub('%s_[0-9]*/' % model.TOWER_NAME, '', l.op.name)
        #     # Name each loss as '(raw)' and name the moving average version of the loss
        #     # as the original loss name.
        #     tf.scalar_summary(loss_name + ' (raw)', l)
        #     tf.scalar_summary(loss_name, loss_averages.average(l))

        # loss to calcurate gradients
        with tf.control_dependencies([loss_averages_op]):
            total_loss = tf.identity(total_loss)
        tf.summary.scalar("loss", total_loss)

        with tf.control_dependencies([loss_averages_op_test]):
            total_loss_test = tf.identity(total_loss_test)
        tf.summary.scalar("loss_eval", total_loss_test)

        # Reuse variables for the next tower.

        # Retain the summaries from the final tower.
        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)

        # Retain the Batch Normalization updates operations only from the
        # final tower. Ideally, we should grab the updates from all towers
        # but these stats accumulate extremely fast so we can ignore the
        # other stats from the other towers without significant detriment.
        batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)

        # add input summaries
        # summaries.extend(input_summaries)

        # train_operation and operation summaries
        train_op = train_operation.train(total_loss, global_step, summaries, batchnorm_updates)

        # trainable variables's summary
        #for var in tf.trainable_variables():
        #    summaries.append(tf.histogram_summary(var.op.name, var))

        # saver
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation from the last tower summaries.
        #summary_op = tf.merge_summary(summaries)
        summary_op = tf.summary.merge(summaries)

        # initialization
        init = tf.global_variables_initializer()

        # session
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        if FLAGS.fine_tune:
            ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                print('%s: Pre-trained model restored from %s' %
                      (datetime.now(), ckpt.model_checkpoint_path))
                saver.restore(sess, ckpt.model_checkpoint_path)

            # if FLAGS.pretrained_model_checkpoint_path:
            #     assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)
            #     variables_to_restore = tf.get_collection(
            #         slim.variables.VARIABLES_TO_RESTORE)
            #     restorer = tf.train.Saver(variables_to_restore)
            #     restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, logits_eval, loss_value, labels_eval, images_debug_eval = sess.run([train_op, logits[0], total_loss, labels, images_debug])
            duration = time.time() - start_time

            dataset.output_images(images_debug_eval, "debug", "train")

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                examples_per_sec = FLAGS.batch_size / float(duration)
                format_str = ('train %s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)')
                print(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration))

            if step % 100 == 0:
                print type(logits_eval)
                print logits_eval.shape
                print logits_eval.argmax(1)
                print labels_eval
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

                test_start_time = time.time()
                logits_test_eval, total_loss_test_val, labels_test_eval, images_test_debug_eval = sess.run([logits_test[0], total_loss_test, labels_test, images_test_debug])
                test_duration = time.time() - test_start_time

                dataset.output_images(images_test_debug_eval, "debug_test", "test")

                print("test predict:")
                print type(logits_test_eval)
                print logits_test_eval.shape
                print logits_test_eval.argmax(1)
                print("test target:")
                print labels_test_eval
                test_examples_per_sec = test_cnt / float(test_duration)
                format_str_test = ('test %s: step %d, loss = %.2f, (%.1f examples/sec; %.3f sec/batch)')
                print(format_str_test % (datetime.now(), step, total_loss_test_val, test_examples_per_sec, test_duration))

                # Save the model checkpoint periodically.
                if step % 5000 == 0 or (step + 1) == FLAGS.max_steps:
                    checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

Пример #2
def train():
    with tf.Graph().as_default():
        # global step number
        global_step = tf.get_variable('global_step', [],
        dataset = DataSet()

        # get training set
        print("The number of training images is: %d" %
        csv_predict = FLAGS.predictcsv
        lines = dataset.load_csv(csv_predict)

        images_ph = tf.placeholder(tf.float32, [1, 229, 229, 3])

        num_classes = FLAGS.num_classes
        restore_logits = not FLAGS.fine_tune

        # inference
        logits = model.inference(images_ph,

        # Retain the summaries from the final tower.
        batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION)

        # saver
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_all_summaries()

        # initialization
        init = tf.initialize_all_variables()

        # session
        sess = tf.Session(config=tf.ConfigProto(

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("load: checkpoint %s" % (ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)

        print("start to predict.")
        for step, line in enumerate(lines):
            pil_img = Image.open(line[0])
            pil_img = pil_img.resize((250, 250))
            img_array_r = np.asarray(pil_img)
            img_array_r = img_array_r[15:244, 15:244, :]
            img_array = img_array_r[None, ...]
            softmax_eval = sess.run([logits[2]],
                                    feed_dict={images_ph: img_array})
            print("%s,%s,%s" % (line[0], line[1], np.argmax(softmax_eval)))
        print("finish to predict.")
