Пример #1
0
def main():
    # 学習用のimages, labelsのbatchを取得
    train, test = inputs.get_data()
    train_images, train_labels = inputs.train_batch(train)
    # 推論結果、誤差、学習のためのOperationを定義
    train_logits = cnn.inference(train_images)
    losses = loss(train_labels, train_logits)
    train_op = training(losses)

    test_images, test_labels = inputs.test_batch(test)
    test_logits = cnn.inference(test_images, reuse=True)
    correct_prediction = tf.equal(tf.argmax(test_logits, 1), tf.to_int64(test_labels))
    accuracy = tf.reduce_mean(tf.to_float(correct_prediction))

    with tf.Session() as sess:
        # batchからデータを取り出すためのスレッドの準備
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        sess.run(tf.global_variables_initializer())

        # 学習を繰り返す
        for i in range(300):
            _, loss_value, accuracy_value = sess.run([train_op, losses, accuracy])
            print("step {:3d} : {:5f} ({:3f})".format(i + 1, loss_value, accuracy_value * 100.0))

        coord.request_stop()
        coord.join(threads)
def tower_loss(scope, images1, labels1, hots1, images2, labels2, hots2):
    """Calculate the total loss on a single tower running the multi-task_cnn model.
    Args:
      scope: unique prefix string identifying the multi-task_cnn tower, e.g. 'tower_0'
      images: Images. 4D tensor of shape [batch_size, height, width, 3].
      labels: Labels. 1D tensor of shape [batch_size].
    Returns:
       Tensor of shape [] containing the total loss for a batch of data
    """

    # Build inference Graph.
    logits1 = cnn.inference(images1, n_cnn=5)

    tf.get_variable_scope().reuse_variables()
    logits2 = cnn.inference(images2, n_cnn=5)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    _ = cnn.loss(logits1, labels1, hots1, logits2, labels2, hots2, loss_type=1)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)

    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
        # session. This helps the clarity of presentation on tensorboard.
        loss_name = re.sub('%s_[0-9]*/' % cnn.TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)

    return total_loss
Пример #3
0
def train():
    """Train CGCNN for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()

        # Get data for training
        # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
        # GPU and resulting in a slow down.
        with tf.device('/cpu:0'):
            energies, sites_matrices, adj_matrices = cnn.inputs(
                eval_data=False)

        # Build a Graph that computes the energy predictions from the
        # inference model.
        energies_hat = cnn.inference(sites_matrices, adj_matrices)

        # Calculate loss.
        loss = cnn.loss(energies_hat, energies)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cnn.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Пример #4
0
def evaluate(X_predict):
    with tf.Graph().as_default() as g:
        NUM = 1  # 预测图片数量
        # 网络输入、输出tensor
        x = tf.placeholder(tf.float32,
                           shape=(NUM, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS),
                           name='x_input')
        y = cnn.inference(x, False, False, regularizer=None)  # 预测结果

        # 实例化一个tf.train.Saver
        saver = tf.train.Saver()

        with tf.Session() as sess:
            # 通过checkpoint文件找到模型文件名
            ckpt = tf.train.get_checkpoint_state(train.MODEL_SAVE_PATH)
            # ckpt.model_checkpoint_path:表示模型存储的位置
            if ckpt and ckpt.model_checkpoint_path:
                # 恢复模型
                saver.restore(sess, ckpt.model_checkpoint_path)
                # 预测
                predict_y = sess.run(y, feed_dict={x: X_predict / 255.0})
                # 选出最大的那个
                predict_y = np.argmax(predict_y, axis=1)
                return predict_y
            else:
                print('No checkpoint file found')
                return
Пример #5
0
def evaluate():
  """Eval CGCNN for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    eval_data = (FLAGS.eval_data == 'test')
    energies, sites_matrices, adj_matrices = cnn.inputs(eval_data=eval_data)

    # Build a Graph that computes the energy predictions from the
    # inference model.
    energies_hat = cnn.inference(sites_matrices, adj_matrices)

    # Calculate the absolute error tensor.
    abs_error_tensor = tf.abs(energies_hat-energies)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cnn.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

    while True:
      eval_once(saver, summary_writer, abs_error_tensor, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Пример #6
0
def evaluate():
    """Eval CNN for a number of steps."""
    g = tf.Graph()
    #with g.as_default():
    with tf.Session(graph=g) as sess:
        images, labels = sess.run(dataset_input_fn())
        imgs = images["image"]
        
        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cnn.inference(imgs)
        
        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        
        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        
        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()
        
        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
        
        while True:
            eval_once(saver, summary_writer, top_k_op, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
Пример #7
0
def evaluate():
    """
        Test one image against the saved models and parameters
    """

    # you need to change the directories to yours.
    train_dir = './data/train/'
    train, train_label = train_test_split.get_files(train_dir)
    image_array = get_image(train)

    with tf.Graph().as_default():
        batch_size = 1
        n_classes = 2

        image = tf.cast(image_array, tf.float32)
        image = tf.image.per_image_standardization(image)
        image = tf.reshape(image, [1, 208, 208, 3])
        logits = cnn.inference(image, batch_size, n_classes)

        logits = tf.nn.softmax(logits)

        X = tf.placeholder(tf.float32, shape=[208, 208, 3])

        # you need to change the directories to yours.
        logs_train_dir = './logs/train/'

        saver = tf.train.Saver()

        with tf.Session() as sess:

            print("Reading checkpoints...")
            ckpt = tf.train.get_checkpoint_state(logs_train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print("Loading success, global_step is %s".format(global_step))
            else:
                print("No checkpoint file found")

            prediction = sess.run(logits, feed_dict={X: image_array})
            max_index = np.argmax(prediction)
            if max_index == 0:
                print("This is a cat with possibility {:.6f}".format(
                    prediction[:, 0]))
            else:
                print("This is a dog with possibility {:.6f}".format(
                    prediction[:, 1]))
Пример #8
0
def run_training():
    # Set there directories .
    train_dir = './data/train/'
    logs_train_dir = './logs/train/'

    train, train_label = train_test_split.get_files(train_dir)

    train_batch, train_label_batch = train_test_split.get_batch(
        train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
    train_logits = cnn.inference(train_batch, BATCH_SIZE, N_CLASSES)
    train_loss = cnn.losses(train_logits, train_label_batch)
    train_op = cnn.training(train_loss, learning_rate)
    train__acc = cnn.evaluation(train_logits, train_label_batch)

    summary_op = tf.summary.merge_all()
    sess = tf.Session()
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
    saver = tf.train.Saver()

    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                break
            _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])

            if step % 50 == 0:
                print("Step {}, ".format(step),
                      "train loss = {:.2f}, ".format(tra_loss),
                      "train accuracy = {:.2f}%".format(tra_acc * 100.0))
                summary_str = sess.run(summary_op)
                train_writer.add_summary(summary_str, step)

            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(logs_train_dir, "model.ckpt")
                saver.save(sess, checkpoint_path, global_step=step)

    except tf.errors.OutOfRangeError:
        print("Done training -- epoch limit reached")
    finally:
        coord.request_stop()

    coord.join(threads)
    sess.close()
Пример #9
0
def main(args=None):

    if tf.gfile.Exists(FLAGS.train_dir):
        tf.gfile.DeleteRecursively(FLAGS.train_dir)
    tf.gfile.MakeDirs(FLAGS.train_dir)


    with tf.Graph().as_default():
        images, labels = network.train_set()
        logits = network.inference(images)
        loss = network.loss(logits, labels)
        train = network.train(loss, 0.01)

        summary = tf.merge_all_summaries()
        init = tf.initialize_all_variables()

        saver = tf.train.Saver()
        with tf.Session() as sess:
            summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
            sess.run(init)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            try:
                for step in range(300):
                    if not coord.should_stop():
                        _, loss_value = sess.run([train, loss])
                        print 'Step %d: loss = %.2f' % (step, loss_value)

                        summary_str = sess.run(summary)
                        summary_writer.add_summary(summary_str, step)
                        summary_writer.flush()

                        checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
                        saver.save(sess, checkpoint_file, global_step=step)

            except tf.errors.OutOfRangeError:
                print 'Done training -- epoch limit reached'
            finally:
                coord.request_stop()

            coord.join(threads)
Пример #10
0
def predict():
    #labels = [FLAGS.img_label]
    #filenames = [FLAGS.img_path]
    stars = [
        "nicolas_cage", "brad_pitt", "angelina_jolie", "leonardo_dicaprio",
        "robert_downey_jr"
    ]

    img, lbl = get_image(FLAGS.img_path, FLAGS.img_label)

    img = tf.convert_to_tensor(img, dtype=tf.float32)
    #label = tf.convert_to_tensor(lbl, dtype=tf.int64)
    #print(tf.shape(label))
    logit = cnn.inference(img)

    # Calculate predictions.
    #top_k_op = tf.nn.in_top_k(logit, label, 1)

    with tf.Session() as sess:

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay)
        variables_to_restore = variable_averages.variables_to_restore()

        saver = tf.train.Saver(variables_to_restore)
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found')
            return

        #true_count = 0
        #predictions = sess.run([top_k_op])
        predictions = sess.run([logit])
        max_idx = predictions[0][0].argmax()

        #true_count += np.sum(predictions)

        print("prediction: {}, truth: {}".format(stars[max_idx],
                                                 stars[FLAGS.img_label]))
Пример #11
0
def run_training():
    # for mnist
    # train_data, test_data, validation_data = input_data.read_data_sets("../data/MNIST_data/")
    # for cifar-10
    train_data, test_data, validation_data = input_data.load_data()

    with tf.Graph().as_default():
        image_pl, label_pl, keep_prob_pl = place_holder(FLAGS.batch_size)
        logits = nn_structure.inference(image_pl, conv_1_params,
                                        max_pool_1_params, conv_2_params,
                                        max_pool_2_params,
                                        full_connected_units, keep_prob_pl)
        loss = nn_structure.loss(logits, label_pl)
        train_op = nn_structure.train(loss, FLAGS.learning_rate)
        eval_correct = nn_structure.evaluation(logits, label_pl, k=1)
        init = tf.initialize_all_variables()

        with tf.Session() as sess:
            sess.run(init)
            start_time = time.time()
            for step in range(FLAGS.max_step):
                feed_dict = fill_feed_dict(train_data, 0.5, image_pl, label_pl,
                                           keep_prob_pl)
                _, loss_value = sess.run([train_op, loss], feed_dict)

                if step % 100 == 0:
                    duration = time.time() - start_time
                    print("Step: {:d}, Training Loss: {:.4f}, {:.1f}ms/step".
                          format(step, loss_value, duration * 10))
                    start_time = time.time()

                if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_step:
                    print("Train Eval:")
                    do_eval(sess, eval_correct, train_data, image_pl, label_pl,
                            keep_prob_pl)
                    print("Validation Eval:")
                    do_eval(sess, eval_correct, validation_data, image_pl,
                            label_pl, keep_prob_pl)
                    print("Test Eval:")
                    do_eval(sess, eval_correct, test_data, image_pl, label_pl,
                            keep_prob_pl)
Пример #12
0
def evaluate(run_once=False):
  with tf.Graph().as_default() as graph:
    
    examples, labels = cnn.inputs(data_type='test')
    logits = cnn.inference(examples)

    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(config.moving_average_decay)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    summary_writer = tf.train.SummaryWriter(config.eval_dir, graph)

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if run_once==True:
        break
      time.sleep(EVAL_INTERVAL_SECS)
Пример #13
0
def train():
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)
    images, labels = cnn.distorted_inputs()
    logits = cnn.inference(images)
    loss = cnn.loss(logits, labels)
    train_op = cnn.train(loss, global_step)
    summary_op = tf.merge_all_summaries()
    init = tf.initialize_all_variables()
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=LOG_DEVICE_PLACEMENT))
    saver = tf.train.Saver(tf.all_variables())

    if tf.gfile.Exists(TRAIN_DIR):
      ckpt = tf.train.get_checkpoint_state(CHECKPOINT_DIR)
      last_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
      ckpt_dir = os.path.join(CHECKPOINT_DIR,"model.ckpt-" + last_step)
      if ckpt and ckpt_dir:
        tf.gfile.DeleteRecursively(TRAIN_DIR)
        saver.restore(sess, ckpt_dir)
        assign_op = global_step.assign(int(last_step))
        sess.run(assign_op)
        print ("Read old model from: ", ckpt_dir)
        print ("Starting training at: ", sess.run(global_step))        
      else:
        tf.gfile.DeleteRecursively(TRAIN_DIR)
        sess.run(init)
        print ("No model found. Starting training at: ",sess.run(global_step))
    else:
      tf.gfile.MakeDirs(TRAIN_DIR)
      sess.run(init)
      print ("No folder found. Starting training at: ",sess.run(global_step))
    print ("Writing train results to: ", TRAIN_DIR)
    print ("Train file: ", TRAIN_FILE)
    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(TRAIN_DIR,
                                            graph_def=sess.graph_def)

    for step in xrange(sess.run(global_step), MAX_STEPS):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = BATCH_SIZE
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 10 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == MAX_STEPS:
        checkpoint_path = os.path.join(TRAIN_DIR, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
Пример #14
0
def train(networkmodel, MODEL_SAVE_PATH, MODEL_NAME):

    if FLAGS.job_name is None or FLAGS.job_name == '':
        raise ValueError('Must specify an explicit job_name !')
    else:
        print('job_name : %s' % FLAGS.job_name)
    if FLAGS.task_index is None or FLAGS.task_index == '':
        raise ValueError('Must specify an explicit task_index!')
    else:
        print('task_index : %d' % FLAGS.task_index)

    ps_spec = FLAGS.ps_hosts.split(',')
    worker_spec = FLAGS.worker_hosts.split(',')

    # 创建集群
    # num_worker = len(worker_spec)
    cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)
    if FLAGS.job_name == 'ps':
        server.join()

    is_chief = (FLAGS.task_index == 0)
    # worker_device = '/job:worker/task%d/cpu:0' % FLAGS.task_index
    with tf.device(tf.train.replica_device_setter(cluster=cluster)):

        # 生成训练数据含标签
        x, y_ = readdata.get_batch(train=True,
                                   batch_size=BATCH_SIZE,
                                   num_epochs=None)
        # 生成测试数据含标签
        text_x, text_y = readdata.get_batch(train=False,
                                            batch_size=BATCH_SIZE,
                                            num_epochs=50)

        # 神经网络模型
        if networkmodel:
            # 调整神经网络输入为一维,-1代表未知数量
            x = tf.reshape(x, [-1, x.shape[1] * x.shape[2] * x.shape[3]])
            # 训练部分输出
            y = network.inference(x, avg_class=None, reuse=False, lamada=None)
        else:
            # 卷积模型
            # 训练部分输入、输出tensor
            y = cnn.inference(x, False, False, regularizer=None)

        # 初始化,从0开始,每batch一次,增加1,创建纪录全局训练步数变量
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # 神经网络模型
        if networkmodel:
            # 测试数据转化为一维,适应神经网络输入
            text_x = tf.reshape(
                text_x,
                [-1, text_x.shape[1] * text_x.shape[2] * text_x.shape[3]])
            # 测试输出
            average_y = network.inference(text_x,
                                          avg_class=None,
                                          reuse=True,
                                          lamada=None)
        else:
            # 卷积网络模型测试输入、输出
            average_y = cnn.inference(text_x, True, False, regularizer=None)

        # 对每个batch数据结果求均值,cross_entropy是一种信息熵方法,能够预测模型对真实概率分布估计的准确程度
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=y, labels=tf.argmax(y_, 1))
        # 求损失函数
        loss = tf.reduce_mean(cross_entropy)
        # 训练操作,GradientDescentOptimizer为梯度下降算法的优化器,学习率LEARNING_RATE,minimize为最小化损失函数操作
        train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(
            loss, global_step=global_step)
        # 预测数字类别是否为正确类别,tf.argmax找出真实类别
        correct_prediction = tf.equal(tf.argmax(average_y, 1),
                                      tf.argmax(text_y, 1))
        # tf.reduce_mean求平均值
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        # # 设计计算图
        # with tf.control_dependencies([train_step]):
        #     train_op = tf.no_op(name='train')
        # 生成本地的参数初始化操作init_op
        init_op = tf.global_variables_initializer()
        train_dir = tempfile.mkdtemp()
        sv = tf.train.Supervisor(is_chief=is_chief,
                                 logdir=train_dir,
                                 init_op=init_op,
                                 recovery_wait_secs=1,
                                 global_step=global_step)

        if is_chief:
            print('Worker %d: Initailizing session...' % FLAGS.task_index)
        else:
            print('Worker %d: Waiting for session to be initaialized...' %
                  FLAGS.task_index)
        sess = sv.prepare_or_wait_for_session(server.target)
        print('Worker %d: Session initialization  complete.' %
              FLAGS.task_index)

        time_begin = time.time()
        print('Traing begins @ %f' % time_begin)
        local_step = 0
        for i in range(TRAINING_STEPS):

            coord = tf.train.Coordinator()  # 创建一个协调器,管理线程
            threads = tf.train.start_queue_runners(sess=sess,
                                                   coord=coord)  # 启动所有队列线程

            _, step, loss_value = sess.run([train_step, global_step, loss])
            local_step += 1

            now = time.time()
            print('%f: Worker %d: traing step %d dome (global step:%d)' %
                  (now, FLAGS.task_index, local_step, step))

            # 打印验证准确率
            if (i + 1) % 100 == 0:
                validate_acc = sess.run(accuracy)  # 设置好整个图后,启动计算accuracy
                print(
                    "After %d training step(s),validation accuracy using average model is %g."
                    % (step, validate_acc))

            coord.request_stop()  # 要求所有线程停止
            coord.join(threads)

        time_end = time.time()
        print('Training ends @ %f' % time_end)
        train_time = time_end - time_begin
        print('Training elapsed time:%f s' % train_time)

    sess.close()
Пример #15
0
def predict():
    filename_queue = tf.train.string_input_producer([
        "../../datasets/stars_from_google_images/brad_pitt_test.jpeg"
    ])  #  list of files to read
    labels = [1]
    reader = tf.WholeFileReader()
    _, value = reader.read(filename_queue)

    img = tf.image.decode_jpeg(
        value)  # use png or jpg decoder based on your files.

    # preprocessing
    img = tf.image.resize_images(img, [FLAGS.image_size, FLAGS.image_size],
                                 method=tf.image.ResizeMethod.BICUBIC)
    img = tf.image.rgb_to_grayscale(img)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.scalar_mul(2.0, img)
    img = img - tf.constant([1.0])
    logits = cnn.inference(img)

    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
            # Assuming model_checkpoint_path looks something like:
            #   /my-favorite-path/cifar10_train/model.ckpt-0,
            # extract global_step from it.
            #global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        else:
            print('No checkpoint file found')
            return

        # Start the queue runners.
        coord = tf.train.Coordinator()
        try:
            threads = []
            for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(
                    qr.create_threads(sess,
                                      coord=coord,
                                      daemon=True,
                                      start=True))

            true_count = 0
            predictions = sess.run([top_k_op])
            true_count += np.sum(predictions)
            # Compute precision @ 1.
            precision = true_count
            print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
        except Exception as e:  # pylint: disable=broad-except
            coord.request_stop(e)

        coord.request_stop()
        coord.join(threads, stop_grace_period_secs=10)
Пример #16
0
def train():
  with tf.Graph().as_default():
    
    log('===== START TRAIN RUN: ' + str(datetime.now()) + '=====')
    
    global_step = tf.Variable(0, trainable=False)
    
    # get examples and labels
    examples, labels = cnn.inputs(data_type='train')

    # build graph to compute logits
    logits = cnn.inference(examples)

    # compute loss
    loss, losses_collection = cnn.loss(logits, labels)
    accuracy = cnn.accuracy(logits, labels)

    # train model with one batch of examples
    train_op = cnn.train(loss, global_step)

    # create saver
    saver = tf.train.Saver(tf.all_variables())
  
    # build summary and init op
    summary_op = tf.merge_all_summaries()
    init_op = tf.initialize_all_variables()

    # start session
    # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    sess = tf.Session()
    sess.run(init_op)
    
    # start queue runners
    tf.train.start_queue_runners(sess=sess)

    # set up summary writers
    train_writer = tf.train.SummaryWriter(config.train_dir, sess.graph)
    
    for step in xrange(config.max_steps):
      
      start_time = time.time()
      summary, loss_value, accuracy_value, _ = sess.run([summary_op, loss, accuracy, train_op])

      loss_breakdown = [(str(l.op.name), sess.run(l)) for l in losses_collection]
        
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % config.summary_every_n_steps == 0: # summaries
        
        examples_per_sec = config.batch_size / duration
        sec_per_batch = float(duration)
        
        train_writer.add_summary(summary, step)

        log_str_1 = ('%s: step %d, loss = %.3f (%.2f examples/sec; %.3f sec/batch), accuracy %.3f   ') % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch, accuracy_value)
        log_str_1 += str(loss_breakdown) # print loss breakdown
        log(log_str_1)

        log("memory usage: {} Mb".format(float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)/1000000.0))
        

      if (step % config.ckpt_every_n_steps == 0) and (step>0): # save weights to file & validate
        checkpoint_path = os.path.join(config.checkpoint_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
        log("Checkpoint saved at step %d" % step)
Пример #17
0
    for line in f:
        labels.append(line.rstrip())

    test_image = []
    for i in range(1, len(sys.argv)):
        img = Image.open(sys.argv[i])
        img = img.resize((nn.IMAGE_SIZE, nn.IMAGE_SIZE))
        test_image.append(np.asarray(img) / 255.0)
    test_image = np.asarray(test_image)

    images_placeholder = tf.placeholder("float",
                                        shape=(None, nn.IMAGE_SIZE,
                                               nn.IMAGE_SIZE, 3))
    labels_placeholder = tf.placeholder('float', shape=(None, nn.NUM_CLASSES))
    keep_prob = tf.placeholder("float")

    logits = nn.inference(images_placeholder, keep_prob)
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, backup_dir + "/model.ckpt")

    for i in range(len(test_image)):
        pred = np.argmax(
            logits.eval(feed_dict={
                images_placeholder: [test_image[i]],
                keep_prob: 1.0
            })[0])
        print(labels[pred])
Пример #18
0
TARGET = sys.argv[1]
LNAME = ['non-patio', 'patio']
IMAGE_SIZE = 64
NUM_CLASS = 2
CH_SIZE = 3
BATCH_SIZE = 600
TRAIN_FILE = ['train.csv']
MAX_STEPS = 100000
flags = tf.app.flags
FLAGS = flags.FLAGS
keep_prob = tf.placeholder("float")
image = input_data.load_image(TARGET, image_size=IMAGE_SIZE, ch_size=CH_SIZE)

#output=mynn.inference2(images,keep_prob,IMAGE_SIZE,CH_SIZE,NUM_CLASS)
output = mynn.inference(image, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS)

with tf.Session() as sess:
    saver = tf.train.Saver(max_to_keep=0)
    #sess.run(tf.initialize_all_variables())
    model_path = '/output'
    saver.restore(sess, tf.train.latest_checkpoint(model_path))
    print("Model restore finished")
    # SummaryWriterでグラフを書く
    tf.train.start_queue_runners(sess)
    actual_res = sess.run([output], feed_dict={keep_prob: 1.0})
    print('result', actual_res)
    print('label', np.argmax(actual_res))
    print('label-name', LNAME[np.argmax(actual_res)])
    print('patio value', actual_res[0][0][1])
Пример #19
0
def evaluate(test_num, test_tfrecord_file, test_pred_file):
    """Eval Multi-task_cnn for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for Multi-task_cnn.
        images, skuid, labels, hots = cnn.inputs(test_tfrecord_file, eval_data=True, batch_size=FLAGS.test_batch_size)
      
        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cnn.inference(images, n_cnn=5)
        
        hots = tf.cast(hots, tf.float32)
        logits = tf.multiply(logits, hots, name='assign_label')

        num_splits = tf.constant(cnn.obtain_splits(num_splits_path))
        # Calculate predictions.
        cnn_pred = cnn.predict(logits, num_splits)
        origin_pred = cnn.predict(labels, num_splits)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            cnn.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                # Restores from checkpoint
                saver.restore(sess, ckpt.model_checkpoint_path)
                print("restore from file")
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
            else:
                print('No checkpoint file found')
                return

            # Start the queue runners.
            coord = tf.train.Coordinator()
            try:
                threads = []
                for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                    threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                                     start=True))
 
               
                num_iter = int(math.ceil(test_num / FLAGS.test_batch_size))
                # Compute precision @ 1.
                sku, cnn_predi, origin_predi = sess.run([skuid, cnn_pred, origin_pred])
                record_sku_pred(sku, cnn_predi, origin_predi, num_splits, test_pred_file, 'wb')
                
                step = 0
                while step < num_iter and not coord.should_stop():
                    sku, cnn_predi, origin_predi = sess.run([skuid, cnn_pred, origin_pred])
                    record_sku_pred(sku, cnn_predi, origin_predi, num_splits, test_pred_file, 'ab')
                    step += 1

                summary = tf.Summary()
                summary.ParseFromString(sess.run(summary_op))
                # summary.value.add(tag='Precision @ 1', simple_value=precision)
                summary_writer.add_summary(summary, global_step)
            except Exception as e:  # pylint: disable=broad-except
                coord.request_stop(e)

            coord.request_stop()
            coord.join(threads, stop_grace_period_secs=10)
Пример #20
0
        BATCH_SIZE,
        image_size=IMAGE_SIZE,
        ch_size=CH_SIZE,
        shuffle = True,
        distored = True)
v_images, v_labels, _ = input_data.load_cifar10(
        TRAIN_FILE,
        BATCH_SIZE,
        image_size=IMAGE_SIZE,
        ch_size=CH_SIZE,
        shuffle = True,
        distored = True)
"""

#output=mynn.inference2(images,keep_prob,IMAGE_SIZE,CH_SIZE,NUM_CLASS)
output = mynn.inference(images, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS)
validate = mynn.inference(v_images,
                          keep_prob,
                          IMAGE_SIZE,
                          CH_SIZE,
                          NUM_CLASS,
                          validate=True)
loss = mynn.loss(output, labels)
train_op = mynn.training(loss)
acc = mynn.accuracy(validate, v_labels)

with tf.Session() as sess:
    saver = tf.train.Saver(max_to_keep=0)
    sess.run(tf.initialize_all_variables())
    ckpt = tf.train.get_checkpoint_state(sess, '/output/')
    print(ckpt)
Пример #21
0
def train(networkmodel, MODEL_SAVE_PATH, MODEL_NAME):
    # with tf.device('/gpu:0'):
    with tf.device('/cpu:0'):
        train_start = time.time()
        # 生成训练数据含标签
        x, y_ = readdata.get_batch(train=True,
                                   batch_size=BATCH_SIZE,
                                   num_epochs=None)
        # 生成测试数据含标签
        text_x, text_y = readdata.get_batch(train=False,
                                            batch_size=BATCH_SIZE,
                                            num_epochs=None)

        # 神经网络模型
        if networkmodel:
            # 调整神经网络输入为一维,-1代表未知数量
            x = tf.reshape(x, [-1, x.shape[1] * x.shape[2] * x.shape[3]])
            # 训练部分输出
            y = network.inference(x, avg_class=None, reuse=False, lamada=None)
        else:
            # 卷积模型
            # 训练部分输入、输出tensor
            y = cnn.inference(x, False, False, regularizer=None)

        # 初始化,从0开始,每batch一次,增加1
        global_step = tf.Variable(0, trainable=False)

        # 神经网络模型
        if networkmodel:
            # 测试数据转化为一维,适应神经网络输入
            text_x = tf.reshape(
                text_x,
                [-1, text_x.shape[1] * text_x.shape[2] * text_x.shape[3]])
            # 测试输出
            average_y = network.inference(text_x,
                                          avg_class=None,
                                          reuse=True,
                                          lamada=None)
        else:
            # 卷积网络模型测试输入、输出
            average_y = cnn.inference(text_x, True, False, regularizer=None)

        # 对每个batch数据结果求均值,cross_entropy是一种信息熵方法,能够预测模型对真实概率分布估计的准确程度
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=y, labels=tf.argmax(y_, 1))
        # 求平均值
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        # 损失函数
        loss = cross_entropy_mean
        # 训练操作,GradientDescentOptimizer为梯度下降算法的优化器,学习率LEARNING_RATE,minimize为最小化损失函数操作
        train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(
            loss, global_step=global_step)
        # 设计计算图
        with tf.control_dependencies([train_step]):
            train_op = tf.no_op(name='train')
        # 预测数字类别是否为正确类别,tf.argmax找出真实类别
        correct_prediction = tf.equal(tf.argmax(average_y, 1),
                                      tf.argmax(text_y, 1))
        # tf.reduce_mean求平均值
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # 初始化tf持久化类
        saver = tf.train.Saver()
        # 初始化会话,并开始训练
        with tf.Session() as sess:
            # 初始化模型的参数
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()  # 创建一个协调器,管理线程
            threads = tf.train.start_queue_runners(sess=sess,
                                                   coord=coord)  # 启动所有队列线程

            # 迭代的训练神经网络
            for i in range(TRAINING_STEPS):
                start_time = time.time()
                _, loss_value, step = sess.run([train_op, loss,
                                                global_step])  # 设置好整个图后,启动计算
                end_time = time.time()
                print('Training elapsed each step time:%f s' %
                      (end_time - start_time))
                # 打印训练损失
                if (i + 1) % 10 == 0:
                    print(
                        "After %d training step(s), loss on training batch is %g."
                        % (step, loss_value))
                # 打印验证准确率
                if (i + 1) % 100 == 0:
                    validate_acc = sess.run(accuracy)  # 设置好整个图后,启动计算accuracy
                    print(
                        "After %d training step(s),validation accuracy using average model is %g."
                        % (step, validate_acc))
                    a = os.path.join(MODEL_SAVE_PATH, MODEL_NAME)
                    saver.save(sess,
                               os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                               global_step=global_step)  # 保存模型
            train_end = time.time()
            print('Training elapsed total time:%f s' %
                  (train_end - train_start))

            coord.request_stop()  # 要求所有线程停止
            coord.join(threads)
Пример #22
0
import tensorflow as tf
import cnn_input as cnn_input
import cnn as cnn
import time

image, label = cnn_input.generate_image_and_label()
images, labels = cnn_input.generate_images_and_labels_batch(image=image,
                                                            label=label,
                                                            shuffle=True)
#神经网络计算出来的值
logits = cnn.inference(images)
loss = cnn.loss(logits, labels)  # 返回的交叉熵的均值
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)  #梯度下降
correct_predict = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))  #在训练集上的正确率
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

t1 = time.time()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in range(10000):
        if i % 100 == 0:
            acc = sess.run(accuracy)
            print('epoch:%d, acc: %f' % (i, acc))
        train_op = sess.run(train_step)
    coord.request_stop()
    coord.join(threads)
Пример #23
0
IMAGE_SIZE = 64
NUM_CLASS = 2
CH_SIZE = 3
BATCH_SIZE = 200
TRAIN_FILE = ['train.csv']
TEST_FILE = ['test.csv']
MAX_STEPS = 100000
flags = tf.app.flags
FLAGS = flags.FLAGS
keep_prob = tf.placeholder("float")
v_images, v_labels, filename = input_data.load_data_for_test(
    TEST_FILE, BATCH_SIZE, image_size=IMAGE_SIZE, ch_size=CH_SIZE)

#output=mynn.inference2(images,keep_prob,IMAGE_SIZE,CH_SIZE,NUM_CLASS)
validate = mynn.inference(v_images, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS)
acc = mynn.accuracy(validate, v_labels)

with tf.Session() as sess:
    saver = tf.train.Saver(max_to_keep=0)
    #sess.run(tf.initialize_all_variables())
    model_path = '/output'
    saver.restore(sess, tf.train.latest_checkpoint(model_path))
    print("Model restore finished")
    # SummaryWriterでグラフを書く
    tf.train.start_queue_runners(sess)
    acc_res, filename_res, actual_res, expect_res = sess.run(
        [acc, filename, validate, v_labels], feed_dict={keep_prob: 1.0})
    print('accuracy', acc_res)
    goods = []
    bads = []