Пример #1
0
def main(_):
    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(60000 / cfg.batch_size)
        num_batches_test = int(10000 / cfg.batch_size)

        batch_x, batch_labels = create_inputs(is_train=False)
        output = net.build_arch(batch_x, is_train=False)
        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()

        step = 0

        summaries = []
        summaries.append(tf.summary.scalar('accuracy', batch_acc))
        summary_op = tf.summary.merge(summaries)

        with tf.Session() as sess:
            tf.train.start_queue_runners(sess=sess)
            summary_writer = tf.summary.FileWriter(cfg.test_logdir,
                                                   graph=sess.graph)

            for epoch in range(cfg.epoch):
                ckpt = os.path.join(
                    cfg.logdir,
                    'model.ckpt-%d' % (num_batches_per_epoch_train * epoch))
                saver.restore(sess, ckpt)

                for i in range(num_batches_test):
                    summary_str = sess.run(summary_op)
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)

                    step += 1
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 2 and isinstance(args[1], str)
    dataset_name = args[1]
    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=False,
                                      epochs=cfg.epoch)
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(dataset_size_train / cfg.batch_size)
        num_batches_test = int(dataset_size_test / cfg.batch_size)

        batch_x, batch_labels = create_inputs()
        output = net.build_arch(batch_x,
                                coord_add,
                                is_train=False,
                                num_classes=num_classes)
        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()

        step = 0

        summaries = []
        summaries.append(tf.summary.scalar('accuracy', batch_acc))
        summary_op = tf.summary.merge(summaries)

        with tf.Session() as sess:
            tf.train.start_queue_runners(sess=sess)
            summary_writer = tf.summary.FileWriter(
                cfg.test_logdir, graph=None)  # graph=sess.graph, huge!

            for epoch in range(cfg.epoch):
                # requires a regex to adapt the loss value in the file name here
                ckpt_re = re.compile()
                ckpt = os.path.join(
                    cfg.logdir,
                    'model.ckpt-%d' % (num_batches_per_epoch_train * epoch))
                saver.restore(sess, ckpt)

                accuracy_sum = 0
                for i in range(num_batches_test):
                    batch_acc_v, summary_str = sess.run(
                        [batch_acc, summary_op])
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)

                    accuracy_sum += batch_acc_v

                    step += 1

                ave_acc = accuracy_sum / num_batches_test
                print('the average accuracy is %f' % ave_acc)
Пример #3
0
def main(args):
    tf.set_random_seed(1234)
    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=False,
                                      epochs=cfg.epoch)

    with tf.Graph().as_default():
        num_batches_test = int(dataset_size_test / cfg.batch_size * 0.5)
        batch_x, batch_labels = create_inputs()
        output, pose_out = net.build_arch(batch_x,
                                          coord_add,
                                          is_train=False,
                                          num_classes=num_classes)
        tf.logging.debug(pose_out.get_shape())

        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()
        session_config = tf.ConfigProto(
            device_count={'GPU': 0},
            gpu_options={
                'allow_growth': 1,
                # 'per_process_gpu_memory_fraction': 0.1,
                'visible_device_list': '0'
            },
            allow_soft_placement=True)
        with tf.Session(config=session_config) as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            mode_file = tf.train.latest_checkpoint(ckpt)
            saver.restore(sess, mode_file)

            accuracy_sum = 0
            for i in range(num_batches_test):
                batch_acc_v = sess.run([batch_acc])
                accuracy_sum += batch_acc_v[0]
                print(accuracy_sum)

            ave_acc = accuracy_sum / num_batches_test
            print('the average accuracy is %f' % ave_acc)
Пример #4
0
def main(_):
    coord_add = [[[8., 8.], [12., 8.], [16., 8.]],
                 [[8., 12.], [12., 12.], [16., 12.]],
                 [[8., 16.], [12., 16.], [16., 16.]]]

    with tf.Graph().as_default():
        batch_x, batch_labels, datanum = utils.get_batch_data(
            is_training=False)
        num_batches_test = math.ceil(datanum /
                                     cfg.batch_size)  #get the ceiling int

        output = net.build_arch(batch_x, coord_add, is_train=False)
        predict = tf.argmax(output, axis=1)
        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()

        step = 0

        summaries = []
        summaries.append(tf.summary.scalar('accuracy', batch_acc))
        summary_op = tf.summary.merge(summaries)

        sess = tf.Session()
        tf.train.start_queue_runners(sess=sess)
        ckpt = tf.train.get_checkpoint_state(cfg.logdir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print(ckpt.model_checkpoint_path)
        summary_writer = tf.summary.FileWriter(cfg.test_logdir,
                                               graph=sess.graph)

        for epoch in range(cfg.test_epoch):
            accuracy_sum = 0
            for i in range(num_batches_test):
                y_pred, y, batch_acc_v, summary_str = sess.run(
                    [predict, batch_labels, batch_acc, summary_op])

                if i % 10 == 0:
                    print('%d/%d batches are tested.' %
                          (step, num_batches_test))
                    #print("labels:\n",batch_labels)
                    print("Y:\n", y)
                    print("Y_prediction:", batch_acc_v, "\n", y_pred)
                summary_writer.add_summary(summary_str, step)
                accuracy_sum += batch_acc_v
                step += 1
                if i == 0:
                    y_pred1 = y_pred
                    label1 = y
                else:
                    y_pred1 = np.concatenate((y_pred1, y_pred), axis=0)
                    label1 = np.concatenate((label1, y), axis=0)
                #print("Label:",np.shape(label1),"\n", label1)

            ave_acc = accuracy_sum / num_batches_test
            # print("The last batch----Y:",np.shape(y),"\n", y)
            # print("Y_prediction:", batch_acc_v, "\n", y_pred)
            print(epoch, 'epoch: average accuracy is %f' % ave_acc)

            print(np.shape(y_pred1), ",", datanum)
            label1 = label1[:datanum]
            y_pred1 = y_pred1[:datanum]
            print("label:", np.shape(label1))
            trade_data.out_indi_data(cfg.test_dataset,
                                     y_pred1,
                                     datalen=cfg.image_size)
Пример #5
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 3 and isinstance(args[1], str) and isinstance(args[2], str)
    dataset_name = args[1]
    model_name = args[2]
    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(
        dataset_name, is_train=False, epochs=cfg.epoch)

    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(dataset_size_train / cfg.batch_size)
        num_batches_test = int(dataset_size_test / cfg.batch_size * 0.1)

        batch_x, batch_labels = create_inputs()
        batch_x = slim.batch_norm(batch_x, center=False, is_training=False, trainable=False)
        if model_name == "caps":
            output, _ = net.build_arch(batch_x, coord_add,
                                       is_train=False, num_classes=num_classes)
        elif model_name == "cnn_baseline":
            output = net.build_arch_baseline(batch_x,
                                             is_train=False, num_classes=num_classes)
        else:
            raise "Please select model from 'caps' or 'cnn_baseline' as the secondary argument of eval.py!"
        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()

        step = 0

        summaries = []
        summaries.append(tf.summary.scalar('accuracy', batch_acc))
        summary_op = tf.summary.merge(summaries)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=False)) as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            if not os.path.exists(cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name)):
                os.makedirs(cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name))
            summary_writer = tf.summary.FileWriter(
                cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name), graph=sess.graph)  # graph=sess.graph, huge!

            files = os.listdir(cfg.logdir + '/{}/{}/'.format(model_name, dataset_name))
            for epoch in range(1, cfg.epoch):
                # requires a regex to adapt the loss value in the file name here
                ckpt_re = ".ckpt-%d" % (num_batches_per_epoch_train * epoch)
                for __file in files:
                    if __file.endswith(ckpt_re + ".index"):
                        ckpt = os.path.join(cfg.logdir + '/{}/{}/'.format(model_name, dataset_name), __file[:-6])
                # ckpt = os.path.join(cfg.logdir, "model.ckpt-%d" % (num_batches_per_epoch_train * epoch))
                saver.restore(sess, ckpt)

                accuracy_sum = 0
                for i in range(num_batches_test):
                    batch_acc_v, summary_str = sess.run([batch_acc, summary_op])
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)

                    accuracy_sum += batch_acc_v

                    step += 1

                ave_acc = accuracy_sum / num_batches_test
                print('the average accuracy is %f' % ave_acc)

            coord.join(threads)
Пример #6
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 2 and isinstance(args[1], str)
    dataset_name = args[1]
    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=False,
                                      epochs=cfg.epoch)
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(dataset_size_train / cfg.batch_size)
        num_batches_test = int(dataset_size_test / cfg.batch_size * 0.1)

        batch_x, batch_labels = create_inputs()
        batch_x = slim.batch_norm(batch_x,
                                  center=False,
                                  is_training=False,
                                  trainable=False)
        output, _ = net.build_arch(batch_x,
                                   coord_add,
                                   is_train=False,
                                   num_classes=num_classes)
        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()

        step = 0

        summaries = []
        summaries.append(tf.summary.scalar('accuracy', batch_acc))
        summary_op = tf.summary.merge(summaries)

        with tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)) as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            summary_writer = tf.summary.FileWriter(
                cfg.test_logdir, graph=sess.graph)  # graph=sess.graph, huge!

            files = os.listdir(cfg.logdir)
            for epoch in range(1, cfg.epoch):
                # requires a regex to adapt the loss value in the file name here
                ckpt_re = ".ckpt-%d" % (num_batches_per_epoch_train * epoch)
                for __file in files:
                    if __file.endswith(ckpt_re + ".index"):
                        ckpt = os.path.join(cfg.logdir, __file[:-6])
                # ckpt = os.path.join(cfg.logdir, "model.ckpt-%d" % (num_batches_per_epoch_train * epoch))
                saver.restore(sess, ckpt)

                accuracy_sum = 0
                for i in range(num_batches_test):
                    batch_acc_v, summary_str = sess.run(
                        [batch_acc, summary_op])
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)
                    print('%d batch accuracy.' % batch_acc_v)

                    accuracy_sum += batch_acc_v

                    step += 1

                ave_acc = accuracy_sum / num_batches_test
                print('the average accuracy is %f' % ave_acc)

            coord.join(threads)
Пример #7
0
def main(_):
    coord_add = [[[8., 8.], [12., 8.], [16., 8.]],
                 [[8., 12.], [12., 12.], [16., 12.]],
                 [[8., 16.], [12., 16.], [16., 16.]]]

    coord_add = np.array(coord_add, dtype=np.float32)/28.
    data = utils.load_trade(is_training=True)
    datanum = data.num_examples
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        batch_x =tf.placeholder(tf.float32,[cfg.batch_size,cfg.image_size,cfg.image_size,3])
        batch_labels = tf.placeholder(tf.int32,[cfg.batch_size])
        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
        opt = tf.train.AdamOptimizer()

        #batch_x, batch_labels,datanum = utils.get_shuffle_batch_data(is_training=True)
        num_batches_per_epoch = int(datanum / cfg.batch_size)
        print(datanum,num_batches_per_epoch)
        # batch_y = tf.one_hot(batch_labels, depth=10, axis=1, dtype=tf.float32)

        m_op = tf.placeholder(dtype=tf.float32, shape=())
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/cpu:0'):
                output = net.build_arch(batch_x, coord_add, is_train=True)
                # loss = net.cross_ent_loss(output, batch_labels)
                loss = net.spread_loss(output, batch_labels, m_op)
                accuracy = net.test_accuracy(output,batch_labels)
            grad = opt.compute_gradients(loss)

        loss_name = 'spread_loss'

        # Print trainable variable parameter statistics to stdout.
        # By default, statistics are associated with each graph node.
        param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=tf.contrib.tfprof.model_analyzer.
                TRAINABLE_VARS_PARAMS_STAT_OPTIONS)

        # param_stats is tensorflow.tfprof.TFGraphNodeProto proto.
        # Let's print the root below.
        sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)

        summaries = []
        summaries.append(tf.summary.scalar(loss_name, loss))
        summaries.append(tf.summary.scalar("accuracy",accuracy))

        train_op = opt.apply_gradients(grad, global_step=global_step)

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
        sess.run(tf.global_variables_initializer())

        # add addition options to trace the session execution
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

        saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)  #cfg.epoch)

        # restore from the check point
        ckpt = tf.train.get_checkpoint_state(cfg.logdir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            initial_step = int(ckpt.model_checkpoint_path.split('-')[1])
            print(ckpt, ckpt.model_checkpoint_path, initial_step)
        else:
            initial_step =0
        m = 0.2

        summary_op = tf.summary.merge(summaries)
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(cfg.logdir, graph=sess.graph)


        cal_num=0
        for step in range(cfg.epoch):
            for i in range(num_batches_per_epoch):
                tic = time.time()
                x,y = data.next_batch(cfg.batch_size)
                _, loss_value,accuracy_val = sess.run([train_op, loss,accuracy], feed_dict={batch_x:x,batch_labels:y,m_op: m})
                print('%d/%d, %d/%d iteration is finished in ' % (step,cfg.epoch,i,num_batches_per_epoch) + '%f second' % (time.time()-tic) + ',m:',m,',loss: %f'% loss_value,",accuracy:",accuracy_val)

                assert not np.isnan(loss_value), 'loss is nan'
                cal_num+=1
                if i % 30 == 0:

                    summary_str = sess.run(summary_op, feed_dict={batch_x:x,batch_labels:y,m_op: m},
                                           options=options,
                                           run_metadata=run_metadata
                                           )
                    summary_writer.add_run_metadata(run_metadata,'step%d'% cal_num)
                    summary_writer.add_summary(summary_str, initial_step+cal_num)

                    # Print to stdout an analysis of the memory usage and the timing information
                    # broken down by operations.
                    # tf.contrib.tfprof.model_analyzer.print_model_analysis(
                    #     tf.get_default_graph(),
                    #     run_meta=run_metadata,
                    #     tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)

                    # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                    # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                    # with open('./time_line/timeline_02_step_%d.json' % i, 'w') as f:
                    #     f.write(chrome_trace)

                if cal_num % cfg.saveperiod == 0:
                    ckpt_path = os.path.join(cfg.logdir, 'model.ckpt')
                    saver.save(sess, ckpt_path, global_step=initial_step + cal_num)

                if m<0.9:
                    m += round((0.9-0.2) / num_batches_per_epoch,5)
                else:
                    m = 0.9

            ckpt_path = os.path.join(cfg.logdir, 'model.ckpt')
            saver.save(sess, ckpt_path, global_step=initial_step+cal_num)
Пример #8
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 3 and isinstance(args[1], str) and isinstance(
        args[2], str)
    dataset_name = args[1]
    model_name = args[2]
    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=False,
                                      epochs=cfg.epoch)
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(dataset_size_train / cfg.batch_size)
        num_batches_test = int(dataset_size_test / cfg.batch_size * 0.1)

        batch_x, batch_labels = create_inputs()
        batch_x = slim.batch_norm(batch_x,
                                  center=False,
                                  is_training=False,
                                  trainable=False)
        if model_name == "caps":
            output, _ = net.build_arch(batch_x,
                                       coord_add,
                                       is_train=False,
                                       num_classes=num_classes)
        elif model_name == "cnn_baseline":
            output = net.build_arch_baseline(batch_x,
                                             is_train=False,
                                             num_classes=num_classes)
        else:
            raise "Please select model from 'caps' or 'cnn_baseline' as the secondary argument of eval.py!"
        batch_acc = net.test_accuracy(output, batch_labels)
        saver = tf.train.Saver()

        step = 0

        summaries = []
        summaries.append(tf.summary.scalar('accuracy', batch_acc))
        summary_op = tf.summary.merge(summaries)

        session_config = tf.ConfigProto(
            device_count={'GPU': 0},
            gpu_options={
                'allow_growth': 1,
                # 'per_process_gpu_memory_fraction': 0.1,
                'visible_device_list': '0'
            },
            allow_soft_placement=True)
        with tf.Session(config=session_config) as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            if not os.path.exists(cfg.test_logdir +
                                  '/{}/{}/'.format(model_name, dataset_name)):
                os.makedirs(cfg.test_logdir +
                            '/{}/{}/'.format(model_name, dataset_name))
            summary_writer = tf.summary.FileWriter(
                cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name),
                graph=sess.graph)  # graph=sess.graph, huge!

            files = os.listdir(cfg.logdir +
                               '/{}/{}/'.format(model_name, dataset_name))
            for epoch in range(1, cfg.epoch):
                # requires a regex to adapt the loss value in the file name here
                ckpt_re = ".ckpt-%d" % (num_batches_per_epoch_train * epoch)
                for __file in files:
                    if __file.endswith(ckpt_re + ".index"):
                        ckpt = os.path.join(
                            cfg.logdir +
                            '/{}/{}/'.format(model_name, dataset_name),
                            __file[:-6])
                # ckpt = os.path.join(cfg.logdir, "model.ckpt-%d" % (num_batches_per_epoch_train * epoch))
                saver.restore(sess, ckpt)

                accuracy_sum = 0
                for i in range(num_batches_test):
                    batch_acc_v, summary_str = sess.run(
                        [batch_acc, summary_op])
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)

                    accuracy_sum += batch_acc_v

                    step += 1

                ave_acc = accuracy_sum / num_batches_test
                print('the average accuracy is %f' % ave_acc)

            coord.join(threads)
Пример #9
0
def main(_):
    coord_add = [[[8., 8.], [12., 8.], [16., 8.]],
                 [[8., 12.], [12., 12.], [16., 12.]],
                 [[8., 16.], [12., 16.], [16., 16.]]]

    coord_add = np.array(coord_add, dtype=np.float32) / 28.
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        batch_x, batch_labels = utils.get_shuffle_tfrecord(is_training=True)
        datanum = 272965
        num_batches_per_epoch = int(datanum / cfg.batch_size)
        print(datanum, num_batches_per_epoch)
        # batch_y = tf.one_hot(batch_labels, depth=10, axis=1, dtype=tf.float32)
        """Use exponential decay leanring rate?"""
        lrn_rate = tf.maximum(
            tf.train.exponential_decay(1e-3, global_step,
                                       num_batches_per_epoch, 0.8), 1e-5)
        tf.summary.scalar('learning_rate', lrn_rate)
        opt = tf.train.AdamOptimizer(learning_rate=lrn_rate)

        m_op = tf.placeholder(dtype=tf.float32, shape=())
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/cpu:0'):
                output = net.build_arch(batch_x, coord_add, is_train=True)
                # loss = net.cross_ent_loss(output, batch_labels)
                loss = net.spread_loss(output, batch_labels, m_op)
                accuracy = net.test_accuracy(output, batch_labels)
                tf.summary.scalar("spread_loss", loss)
                tf.summary.scalar("accuracy", accuracy)
            """Compute gradient."""
            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [
                tf.check_numerics(g, message='Gradient NaN Found!')
                for g, _ in grad if g is not None
            ] + [tf.check_numerics(loss, message='Loss NaN Found')]
        """Apply graident."""
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)

            # Print trainable variable parameter statistics to stdout.
            # By default, statistics are associated with each graph node.
        param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=tf.contrib.tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)

        # param_stats is tensorflow.tfprof.TFGraphNodeProto proto.
        # Let's print the root below.
        print('total_params: %d\n' % param_stats.total_parameters)

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False))
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=30)  #cfg.epoch)

        # restore from the check point
        ckpt = tf.train.get_checkpoint_state(cfg.logdir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            initial_step = int(ckpt.model_checkpoint_path.split('-')[1])
            print(ckpt, ckpt.model_checkpoint_path, initial_step)
            m = 0.9
        else:
            initial_step = 0
            m = 0.2

        # read snapshot
        # latest = os.path.join(cfg.logdir, 'model.ckpt-4680')
        # saver.restore(sess, latest)
        """Set summary op."""
        summary_op = tf.summary.merge_all()
        """Start coord & queue."""
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        """Set summary writer"""
        # if not os.path.exists(cfg.logdir):
        #     os.makedirs(cfg.logdir)
        summary_writer = tf.summary.FileWriter(
            cfg.logdir, graph=sess.graph)  # graph = sess.graph, huge!

        cal_num = 0

        for step in range(cfg.epoch):
            for i in range(num_batches_per_epoch):
                tic = time.time()
                """"TF queue would pop batch until no file"""
                try:
                    _, loss_value, accuracy_val = sess.run(
                        [train_op, loss, accuracy], feed_dict={m_op: m})
                    print(
                        '%d/%d, %d/%d iteration is finished in ' %
                        (step, cfg.epoch, i, num_batches_per_epoch) +
                        '%f second' % (time.time() - tic) + ',m:', m,
                        ',loss: %f' % loss_value, ",accuracy:", accuracy_val)

                    cal_num += 1
                except tf.errors.InvalidArgumentError:
                    print('%d iteration contains NaN gradients. Discard.' %
                          cal_num)
                    continue
                else:
                    """Write to summary."""
                    if i % 30 == 0:
                        summary_str = sess.run(summary_op, feed_dict={m_op: m})
                        summary_writer.add_summary(summary_str,
                                                   initial_step + cal_num)

                    if cal_num % cfg.saveperiod == 0:
                        ckpt_path = os.path.join(cfg.logdir, 'model.ckpt')
                        saver.save(sess,
                                   ckpt_path,
                                   global_step=initial_step + cal_num)

                    if m < 0.9:
                        m += round((0.9 - 0.2) / num_batches_per_epoch, 5)
                    else:
                        m = 0.9

            ckpt_path = os.path.join(cfg.logdir, 'model.ckpt')
            saver.save(sess, ckpt_path, global_step=initial_step + cal_num)
        """Join threads"""
        coord.join(threads)
Пример #10
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 3 and isinstance(args[1], str) and isinstance(args[2], str)
    dataset_name = args[1]
    model_name = args[2]

    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(
        dataset_name, is_train=False, epochs=cfg.epoch)

    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(dataset_size_train / cfg.batch_size)
        num_batches_test = 2  # int(dataset_size_test / cfg.batch_size * 0.1)

        batch_x, batch_labels = create_inputs()
        batch_squash = tf.divide(batch_x, 255.)
        batch_x_norm = slim.batch_norm(batch_x, center=False, is_training=False, trainable=False)
        output, pose_out = net.build_arch(batch_x_norm, coord_add,
                                          is_train=False, num_classes=num_classes)
        tf.logging.debug(pose_out.get_shape())

        batch_acc = net.test_accuracy(output, batch_labels)
        m_op = tf.constant(0.9)
        loss, spread_loss, mse, recon_img_squash = net.spread_loss(
            output, pose_out, batch_squash, batch_labels, m_op)
        tf.summary.scalar('spread_loss', spread_loss)
        tf.summary.scalar('reconstruction_loss', mse)
        tf.summary.scalar('all_loss', loss)
        data_size = int(batch_x.get_shape()[1])
        recon_img = tf.multiply(tf.reshape(recon_img_squash, shape=[
                                cfg.batch_size, data_size, data_size, 1]), 255.)
        orig_img = tf.reshape(batch_x, shape=[
            cfg.batch_size, data_size, data_size, 1])
        tf.summary.image('orig_image', orig_img)
        tf.summary.image('recon_image', recon_img)
        saver = tf.train.Saver()

        step = 0

        tf.summary.scalar('accuracy', batch_acc)
        summary_op = tf.summary.merge_all()

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=False)) as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            if not os.path.exists(cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name)):
                os.makedirs(cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name))
            summary_writer = tf.summary.FileWriter(
                cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name), graph=sess.graph)  # graph=sess.graph, huge!

            files = os.listdir(cfg.logdir + '/{}/{}/'.format(model_name, dataset_name))
            for epoch in range(14, 15):
                # requires a regex to adapt the loss value in the file name here
                ckpt_re = ".ckpt-%d" % (num_batches_per_epoch_train * epoch)
                for __file in files:
                    if __file.endswith(ckpt_re + ".index"):
                        ckpt = os.path.join(
                            cfg.logdir + '/{}/{}/'.format(model_name, dataset_name), __file[:-6])
                # ckpt = os.path.join(cfg.logdir, "model.ckpt-%d" % (num_batches_per_epoch_train * epoch))
                saver.restore(sess, ckpt)

                accuracy_sum = 0
                for i in range(num_batches_test):
                    batch_acc_v, summary_str, orig_image, recon_image = sess.run(
                        [batch_acc, summary_op, orig_img, recon_img])
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)

                    accuracy_sum += batch_acc_v

                    step += 1
                    # display original/reconstructed images in matplotlib
                    plot_imgs(orig_image, i, 'ori')
                    plot_imgs(recon_image, i, 'rec')

                ave_acc = accuracy_sum / num_batches_test
                print('the average accuracy is %f' % ave_acc)
Пример #11
0
def main(args):
    assert len(args) == 2 and isinstance(args[1], str)

    # Get dataset name
    dataset_name = args[1]  # mnist
    logger.info(f'Using dataset: {dataset_name}')

    # Set reproducible random seed
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)  # (3, 3, 2)
    dataset_size = get_dataset_size_train(dataset_name)  # 55,000
    num_classes = get_num_classes(dataset_name)  # 10
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=True,
                                      epochs=cfg.epoch)

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Get global_step
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        num_batches_per_epoch = dataset_size // cfg.batch_size  # 1100

        opt = tf.train.AdamOptimizer()

        # Get batch from data queue
        batch_x, batch_labels = create_inputs()  # (50 28, 28, 1), (50,)

        m_op = tf.placeholder(dtype=tf.float32, shape=())
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/cpu:0'):
                batch_squash = tf.divide(batch_x, 255.)
                batch_x = slim.batch_norm(batch_x,
                                          center=False,
                                          is_training=True,
                                          trainable=True)

                output, pose_out = net.build_arch(
                    batch_x, coord_add, is_train=True,
                    num_classes=num_classes)  # (50, 10), (50, 10, 18)
                tf.logging.debug(pose_out.get_shape())

                # Define loss = spread_loss + reconstruction loss
                loss, spread_loss, mse, _ = net.spread_loss(
                    output, pose_out, batch_squash, batch_labels, m_op)

                acc = net.test_accuracy(output, batch_labels)
                tf.summary.scalar('spread_loss', spread_loss)
                tf.summary.scalar('reconstruction_loss', mse)
                tf.summary.scalar('all_loss', loss)
                tf.summary.scalar('train_acc', acc)

            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [
                tf.check_numerics(g, message='Gradient NaN Found!')
                for g, _ in grad if g is not None
            ] + [tf.check_numerics(loss, message='Loss NaN Found')]

        # Apply graident
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)

        # Set Session settings
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False))
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())

        # Set Saver
        var_to_save = [
            v for v in tf.global_variables() if 'Adam' not in v.name
        ]  # Don't save redundant Adam beta/gamma
        saver = tf.train.Saver(var_list=var_to_save, max_to_keep=cfg.epoch)

        # Display parameters
        total_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in var_to_save
        ]).astype(np.int32)
        train_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        ]).astype(np.int32)
        logger.info('Total Parameters: {}'.format(total_p))
        logger.info('Trainable Parameters: {}'.format(train_p))

        # read snapshot
        # latest = os.path.join(cfg.logdir, 'model.ckpt-4680')
        # saver.restore(sess, latest)

        # Set summary op
        summary_op = tf.summary.merge_all()

        # Start coord & queue
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # Set summary writer
        if not os.path.exists(cfg.logdir +
                              '/caps/{}/train_log/'.format(dataset_name)):
            os.makedirs(cfg.logdir +
                        '/caps/{}/train_log/'.format(dataset_name))
        summary_writer = tf.summary.FileWriter(
            cfg.logdir + f"/caps/{dataset_name}/train_log/",
            graph=sess.graph)  # graph = sess.graph, huge!

        # Main loop
        m_min = 0.2
        m_max = 0.9
        m = m_min
        for step in range(cfg.epoch * num_batches_per_epoch + 1):
            tic = time.time()
            # TF queue would pop batch until no file
            try:
                _, loss_value, summary_str = sess.run(
                    [train_op, loss, summary_op], feed_dict={m_op: m})
                logger.info('%d iteration finishs in ' % step + '%f second' %
                            (time.time() - tic) + ' loss=%f' % loss_value)
            except KeyboardInterrupt:
                sess.close()
                sys.exit()
            except tf.errors.InvalidArgumentError:
                logger.warning(
                    '%d iteration contains NaN gradients. Discard.' % step)
                continue
            else:
                if step % 5 == 0:
                    summary_writer.add_summary(summary_str, step)
                """Epoch wise linear annealling."""
                if (step % num_batches_per_epoch) == 0:
                    if step > 0:
                        m += (m_max - m_min) / (cfg.epoch * cfg.m_schedule)
                        if m > m_max:
                            m = m_max

                    # Save model periodically
                    ckpt_path = os.path.join(
                        cfg.logdir + '/caps/{}/'.format(dataset_name),
                        'model-{:.4f}.ckpt'.format(loss_value))
                    saver.save(sess, ckpt_path, global_step=step)
Пример #12
0
def main(args):
    assert len(args) == 2 and isinstance(args[1], str)

    # Get dataset name
    dataset_name = args[1]   # mnist
    logger.info(f'Using dataset: {dataset_name}')

    # Set reproducible random seed
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)             # (3, 3, 2)
    dataset_size = get_dataset_size_train(dataset_name) # 55,000
    num_classes = get_num_classes(dataset_name)         # 10
    create_inputs = get_create_inputs(dataset_name, is_train=True, epochs=cfg.epoch)

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Get global_step
        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        num_batches_per_epoch = dataset_size // cfg.batch_size # 1100

        opt = tf.train.AdamOptimizer()

        # Get batch from data queue
        batch_x, batch_labels = create_inputs() # (50 28, 28, 1), (50,)

        m_op = tf.placeholder(dtype=tf.float32, shape=())
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/cpu:0'):
                batch_squash = tf.divide(batch_x, 255.)
                batch_x = slim.batch_norm(batch_x, center=False, is_training=True, trainable=True)

                output, pose_out = net.build_arch(batch_x, coord_add, is_train=True, num_classes=num_classes) # (50, 10), (50, 10, 18)
                tf.logging.debug(pose_out.get_shape())

                # Define loss = spread_loss + reconstruction loss
                loss, spread_loss, mse, _ = net.spread_loss(output, pose_out, batch_squash, batch_labels, m_op)

                acc = net.test_accuracy(output, batch_labels)
                tf.summary.scalar('spread_loss', spread_loss)
                tf.summary.scalar('reconstruction_loss', mse)
                tf.summary.scalar('all_loss', loss)
                tf.summary.scalar('train_acc', acc)

            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [tf.check_numerics(g, message='Gradient NaN Found!')
                          for g, _ in grad if g is not None] + [tf.check_numerics(loss, message='Loss NaN Found')]

        # Apply graident
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)

        # Set Session settings
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())

        # Set Saver
        var_to_save = [v for v in tf.global_variables() if 'Adam' not in v.name]  # Don't save redundant Adam beta/gamma
        saver = tf.train.Saver(var_list=var_to_save, max_to_keep=cfg.epoch)

        # Display parameters
        total_p = np.sum([np.prod(v.get_shape().as_list()) for v in var_to_save]).astype(np.int32)
        train_p = np.sum([np.prod(v.get_shape().as_list())
                          for v in tf.trainable_variables()]).astype(np.int32)
        logger.info('Total Parameters: {}'.format(total_p))
        logger.info('Trainable Parameters: {}'.format(train_p))

        # read snapshot
        # latest = os.path.join(cfg.logdir, 'model.ckpt-4680')
        # saver.restore(sess, latest)

        # Set summary op
        summary_op = tf.summary.merge_all()

        # Start coord & queue
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # Set summary writer
        if not os.path.exists(cfg.logdir + '/caps/{}/train_log/'.format(dataset_name)):
            os.makedirs(cfg.logdir + '/caps/{}/train_log/'.format(dataset_name))
        summary_writer = tf.summary.FileWriter(
            cfg.logdir + f"/caps/{dataset_name}/train_log/", graph=sess.graph)  # graph = sess.graph, huge!

        # Main loop
        m_min = 0.2
        m_max = 0.9
        m = m_min
        for step in range(cfg.epoch * num_batches_per_epoch + 1):
            tic = time.time()
            # TF queue would pop batch until no file
            try:
                _, loss_value, summary_str = sess.run([train_op, loss, summary_op], feed_dict={m_op: m})
                logger.info('%d iteration finishs in ' % step + '%f second' %
                            (time.time() - tic) + ' loss=%f' % loss_value)
            except KeyboardInterrupt:
                sess.close()
                sys.exit()
            except tf.errors.InvalidArgumentError:
                logger.warning('%d iteration contains NaN gradients. Discard.' % step)
                continue
            else:
                if step % 5 == 0:
                    summary_writer.add_summary(summary_str, step)

                """Epoch wise linear annealling."""
                if (step % num_batches_per_epoch) == 0:
                    if step > 0:
                        m += (m_max - m_min) / (cfg.epoch * cfg.m_schedule)
                        if m > m_max:
                            m = m_max

                    # Save model periodically
                    ckpt_path = os.path.join(cfg.logdir + '/caps/{}/'.format(dataset_name), 'model-{:.4f}.ckpt'.format(loss_value))
                    saver.save(sess, ckpt_path, global_step=step)
Пример #13
0
def main(args):
    with tf.Graph().as_default(), tf.device('/gpu:0'):
        """Set reproduciable random seed"""
        tf.set_random_seed(1234)

        coord_add = cfg.get_coord_add(dataset_name)
        dataset_size = cfg.get_dataset_size_train(dataset_name)
        num_classes = cfg.get_num_classes(dataset_name)
        #数据的创建要放在同一个图中,否则容易报错
        create_inputs = cfg.get_create_inputs(dataset_name,
                                              is_train=True,
                                              epochs=cfg.epoch)
        """Get global_step."""
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        """Get batches per epoch."""
        num_batches_per_epoch = int(dataset_size / cfg.batch_size)
        """Use exponential decay leanring rate?"""
        lrn_rate = tf.maximum(
            tf.train.exponential_decay(1e-1, global_step,
                                       num_batches_per_epoch, 0.95), 1e-5)
        tf.summary.scalar('learning_rate', lrn_rate)
        opt = tf.train.AdamOptimizer()  # lrn_rate
        """Get batch from data queue."""
        batch_x, batch_labels = create_inputs()
        # batch_y = tf.one_hot(batch_labels, depth=10, axis=1, dtype=tf.float32)
        """Define the dataflow graph."""
        m_op = tf.placeholder(dtype=tf.float32, shape=())
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/gpu:0'):
                # batch_x = slim.batch_norm(batch_x, center=False, is_training=True, trainable=True)
                output, pose_out = net.build_arch(batch_x,
                                                  coord_add,
                                                  is_train=True,
                                                  num_classes=num_classes)
                # loss = net.cross_ent_loss(output, batch_labels)
                tf.logging.debug(pose_out.get_shape())
                loss, spread_loss, mse, _ = net.spread_loss(
                    output, pose_out, batch_x, batch_labels, m_op)

                acc = net.test_accuracy(output, batch_labels)
                tf.summary.scalar('spread_loss', spread_loss)
                tf.summary.scalar('reconstruction_loss', mse)
                tf.summary.scalar('all_loss', loss)
                tf.summary.scalar('train_acc', acc)
            """Compute gradient."""
            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [
                tf.check_numerics(g, message='Gradient NaN Found!')
                for g, _ in grad if g is not None
            ] + [tf.check_numerics(loss, message='Loss NaN Found')]
        """Apply graident."""
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)
        """Set Session settings."""
        session_config = tf.ConfigProto(
            device_count={'GPU': 0},
            gpu_options={
                'allow_growth': 1,
                # 'per_process_gpu_memory_fraction': 0.1,
                'visible_device_list': '0'
            },
            allow_soft_placement=True)
        sess = tf.Session(config=session_config)
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        """Set Saver."""
        var_to_save = [
            v for v in tf.global_variables() if 'Adam' not in v.name
        ]  # Don't save redundant Adam beta/gamma
        saver = tf.train.Saver(var_list=var_to_save, max_to_keep=5)
        """Display parameters"""
        total_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in var_to_save
        ]).astype(np.int32)
        train_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        ]).astype(np.int32)
        logger.info('Total Parameters: {}'.format(total_p))
        logger.info('Trainable Parameters: {}'.format(train_p))

        ####################read snapshot########################################
        if restore_model:
            model_file = tf.train.latest_checkpoint(ckpt)
            saver.restore(sess, model_file)
        ####################    end   ########################################
        """Set summary op."""
        summary_op = tf.summary.merge_all()
        """Start coord & queue."""
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        """Set summary writer"""
        if not os.path.exists(cfg.logdir +
                              '/caps/{}/train_log/'.format(dataset_name)):
            os.makedirs(cfg.logdir +
                        '/caps/{}/train_log/'.format(dataset_name))
        summary_writer = tf.summary.FileWriter(
            cfg.logdir + '/caps/{}/train_log/'.format(dataset_name),
            graph=sess.graph)  # graph = sess.graph, huge!
        """Main loop."""
        m_min = 0.2
        m_max = 0.9
        m = m_min

        for step in range(start_epoch * num_batches_per_epoch,
                          cfg.epoch * num_batches_per_epoch + 1):

            tic = time.time()
            """"TF queue would pop batch until no file"""
            try:
                _, loss_value, summary_str = sess.run(
                    [train_op, loss, summary_op], feed_dict={m_op: m})
                logger.info('%d iteration finishs in ' % step + '%f second' %
                            (time.time() - tic) + ' loss=%f' % loss_value)
            except KeyboardInterrupt:
                sess.close()
                sys.exit()
            except tf.errors.InvalidArgumentError:
                logger.warning(
                    '%d iteration contains NaN gradients. Discard.' % step)
                continue
            else:
                """Write to summary."""
                if step % 5 == 0:
                    summary_writer.add_summary(summary_str, step)
                """Epoch wise linear annealling."""
                epoch_th = step / num_batches_per_epoch

                if (((epoch_th + 1) % 5)) == 0:
                    print('epoch_th: %d ' % epoch_th)
                    if step > 0:
                        m += (m_max - m_min) / (cfg.epoch * cfg.m_schedule)
                        if m > m_max:
                            m = m_max
                    """Save model periodically"""
                    ckpt_path = os.path.join(
                        cfg.logdir + '/caps/{}/'.format(dataset_name),
                        'model-{:.4f}.ckpt'.format(loss_value))
                    saver.save(sess, ckpt_path, global_step=step)
Пример #14
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 3 and isinstance(args[1], str) and isinstance(
        args[2], str)
    dataset_name = args[1]
    model_name = args[2]
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)
    dataset_size_train = get_dataset_size_train(dataset_name)
    dataset_size_test = get_dataset_size_test(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=False,
                                      epochs=cfg.epoch)

    with tf.Graph().as_default():
        num_batches_per_epoch_train = int(dataset_size_train / cfg.batch_size)
        num_batches_test = 2  # int(dataset_size_test / cfg.batch_size * 0.1)

        batch_x, batch_labels = create_inputs()
        batch_squash = tf.divide(batch_x, 255.)
        batch_x_norm = slim.batch_norm(batch_x,
                                       center=False,
                                       is_training=False,
                                       trainable=False)
        output, pose_out = net.build_arch(batch_x_norm,
                                          coord_add,
                                          is_train=False,
                                          num_classes=num_classes)
        tf.logging.debug(pose_out.get_shape())

        batch_acc = net.test_accuracy(output, batch_labels)
        m_op = tf.constant(0.9)
        loss, spread_loss, mse, recon_img_squash = net.spread_loss(
            output, pose_out, batch_squash, batch_labels, m_op)
        tf.summary.scalar('spread_loss', spread_loss)
        tf.summary.scalar('reconstruction_loss', mse)
        tf.summary.scalar('all_loss', loss)
        data_size = int(batch_x.get_shape()[1])
        recon_img = tf.multiply(
            tf.reshape(recon_img_squash,
                       shape=[cfg.batch_size, data_size, data_size, 1]), 255.)
        orig_img = tf.reshape(batch_x,
                              shape=[cfg.batch_size, data_size, data_size, 1])
        tf.summary.image('orig_image', orig_img)
        tf.summary.image('recon_image', recon_img)
        saver = tf.train.Saver()

        step = 0

        tf.summary.scalar('accuracy', batch_acc)
        summary_op = tf.summary.merge_all()

        with tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)) as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            if not os.path.exists(cfg.test_logdir +
                                  '/{}/{}/'.format(model_name, dataset_name)):
                os.makedirs(cfg.test_logdir +
                            '/{}/{}/'.format(model_name, dataset_name))
            summary_writer = tf.summary.FileWriter(
                cfg.test_logdir + '/{}/{}/'.format(model_name, dataset_name),
                graph=sess.graph)  # graph=sess.graph, huge!

            files = os.listdir(cfg.logdir +
                               '/{}/{}/'.format(model_name, dataset_name))
            for epoch in range(45, 46):
                # requires a regex to adapt the loss value in the file name here
                ckpt_re = ".ckpt-%d" % (num_batches_per_epoch_train * epoch)
                for __file in files:
                    if __file.endswith(ckpt_re + ".index"):
                        ckpt = os.path.join(
                            cfg.logdir +
                            '/{}/{}/'.format(model_name, dataset_name),
                            __file[:-6])
                #ckpt = os.path.join(cfg.logdir, "model.ckpt-%d" % (num_batches_per_epoch_train * epoch))
                ############Comentar linea de abajo
                #ckpt = os.path.join(cfg.logdir, "caps/mnist/model-0.3764.ckpt-1718")
                saver.restore(sess, ckpt)

                accuracy_sum = 0
                for i in range(num_batches_test):
                    batch_acc_v, summary_str, orig_image, recon_image = sess.run(
                        [batch_acc, summary_op, orig_img, recon_img])
                    print('%d batches are tested.' % step)
                    summary_writer.add_summary(summary_str, step)

                    accuracy_sum += batch_acc_v

                    step += 1
                    # display original/reconstructed images in matplotlib
                    plot_imgs(orig_image, i, 'ori')
                    plot_imgs(recon_image, i, 'rec')

                ave_acc = accuracy_sum / num_batches_test
                print('the average accuracy is %f' % ave_acc)
Пример #15
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 2 and isinstance(args[1], str)
    dataset_name = args[1]
    logger.info('Using dataset: {}'.format(dataset_name))
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)
    dataset_size = get_dataset_size_train(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name,
                                      is_train=True,
                                      epochs=cfg.epoch)

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        """Get global_step."""
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        """Get batches per epoch."""
        num_batches_per_epoch = int(dataset_size / cfg.batch_size)
        """Use exponential decay leanring rate?"""
        lrn_rate = tf.maximum(
            tf.train.exponential_decay(1e-3, global_step,
                                       num_batches_per_epoch, 0.8), 1e-5)
        tf.summary.scalar('learning_rate', lrn_rate)
        opt = tf.train.AdamOptimizer()  # lrn_rate
        """Get batch from data queue."""
        batch_x, batch_labels = create_inputs()
        # batch_y = tf.one_hot(batch_labels, depth=10, axis=1, dtype=tf.float32)
        """Define the dataflow graph."""
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/cpu:0'):
                batch_x_squash = tf.divide(batch_x, 255.)
                batch_x = slim.batch_norm(batch_x,
                                          center=False,
                                          is_training=True,
                                          trainable=True)
                output = net.build_arch_baseline(batch_x,
                                                 is_train=True,
                                                 num_classes=num_classes)
                loss, recon_loss, _ = net.cross_ent_loss(
                    output, batch_x_squash, batch_labels)
                acc = net.test_accuracy(output, batch_labels)
                tf.summary.scalar('train_acc', acc)
                tf.summary.scalar('recon_loss', recon_loss)
                tf.summary.scalar('all_loss', loss)
            """Compute gradient."""
            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [
                tf.check_numerics(g, message='Gradient NaN Found!')
                for g, _ in grad if g is not None
            ] + [tf.check_numerics(loss, message='Loss NaN Found')]
        """Apply graident."""
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)
        """Set Session settings."""
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False))
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        """Set Saver."""
        var_to_save = [
            v for v in tf.global_variables() if 'Adam' not in v.name
        ]  # Don't save redundant Adam beta/gamma
        saver = tf.train.Saver(var_list=var_to_save, max_to_keep=cfg.epoch)
        """Display parameters"""
        total_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in var_to_save
        ]).astype(np.int32)
        train_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        ]).astype(np.int32)
        logger.info('Total Parameters: {}'.format(total_p))
        logger.info('Trainable Parameters: {}'.format(train_p))

        # read snapshot
        # latest = os.path.join(cfg.logdir, 'model.ckpt-4680')
        # saver.restore(sess, latest)
        """Set summary op."""
        summary_op = tf.summary.merge_all()
        """Start coord & queue."""
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        """Set summary writer"""
        if not os.path.exists(
                cfg.logdir +
                '/cnn_baseline/{}/train_log/'.format(dataset_name)):
            os.makedirs(cfg.logdir +
                        '/cnn_baseline/{}/train_log/'.format(dataset_name))
        summary_writer = tf.summary.FileWriter(
            cfg.logdir + '/cnn_baseline/{}/train_log/'.format(dataset_name),
            graph=sess.graph)
        """Main loop."""
        for step in range(cfg.epoch * num_batches_per_epoch + 1):
            tic = time.time()
            """"TF queue would pop batch until no file"""
            try:
                _, loss_value, summary_str = sess.run(
                    [train_op, loss, summary_op])
                logger.info('%d iteration finishs in ' % step + '%f second' %
                            (time.time() - tic) + ' loss=%f' % loss_value)
            except KeyboardInterrupt:
                sess.close()
                sys.exit()
            except tf.errors.InvalidArgumentError:
                logger.warning(
                    '%d iteration contains NaN gradients. Discard.' % step)
                continue
            else:
                """Write to summary."""
                if step % 5 == 0:
                    summary_writer.add_summary(summary_str, step)
                """Epoch wise linear annealling."""
                if (step % num_batches_per_epoch) == 0:
                    """Save model periodically"""
                    ckpt_path = os.path.join(
                        cfg.logdir + '/cnn_baseline/{}'.format(dataset_name),
                        'model-{:.4f}.ckpt'.format(loss_value))
                    saver.save(sess, ckpt_path, global_step=step)
        """Join threads"""
        coord.request_stop()
        coord.join(threads)
Пример #16
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 2 and isinstance(args[1], str)
    dataset_name = args[1]
    logger.info('Using dataset: {}'.format(dataset_name))

    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)
    dataset_size = get_dataset_size_train(dataset_name)
    num_classes = get_num_classes(dataset_name)
    create_inputs = get_create_inputs(dataset_name, is_train=True, epochs=cfg.epoch)

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        """Get global_step."""
        global_step = tf.get_variable(
            'global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        """Get batches per epoch."""
        num_batches_per_epoch = int(dataset_size / cfg.batch_size)

        """Use exponential decay leanring rate?"""
        lrn_rate = tf.maximum(tf.train.exponential_decay(
            1e-3, global_step, num_batches_per_epoch, 0.8), 1e-5)
        tf.summary.scalar('learning_rate', lrn_rate)
        opt = tf.train.AdamOptimizer()  # lrn_rate

        """Get batch from data queue."""
        batch_x, batch_labels = create_inputs()
        # batch_y = tf.one_hot(batch_labels, depth=10, axis=1, dtype=tf.float32)

        """Define the dataflow graph."""
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable], device='/cpu:0'):
                batch_x_squash = tf.divide(batch_x, 255.)
                batch_x = slim.batch_norm(batch_x, center=False, is_training=True, trainable=True)
                output = net.build_arch_baseline(batch_x, is_train=True,
                                                 num_classes=num_classes)
                loss, recon_loss, _ = net.cross_ent_loss(output, batch_x_squash, batch_labels)
                acc = net.test_accuracy(output, batch_labels)
                tf.summary.scalar('train_acc', acc)
                tf.summary.scalar('recon_loss', recon_loss)
                tf.summary.scalar('all_loss', loss)

            """Compute gradient."""
            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [tf.check_numerics(g, message='Gradient NaN Found!')
                          for g, _ in grad if g is not None] + [tf.check_numerics(loss, message='Loss NaN Found')]

        """Apply graident."""
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)

        """Set Session settings."""
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False))
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())

        """Set Saver."""
        var_to_save = [v for v in tf.global_variables(
        ) if 'Adam' not in v.name]  # Don't save redundant Adam beta/gamma
        saver = tf.train.Saver(var_list=var_to_save, max_to_keep=cfg.epoch)

        """Display parameters"""
        total_p = np.sum([np.prod(v.get_shape().as_list()) for v in var_to_save]).astype(np.int32)
        train_p = np.sum([np.prod(v.get_shape().as_list())
                          for v in tf.trainable_variables()]).astype(np.int32)
        logger.info('Total Parameters: {}'.format(total_p))
        logger.info('Trainable Parameters: {}'.format(train_p))

        # read snapshot
        # latest = os.path.join(cfg.logdir, 'model.ckpt-4680')
        # saver.restore(sess, latest)
        """Set summary op."""
        summary_op = tf.summary.merge_all()

        """Start coord & queue."""
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        """Set summary writer"""
        if not os.path.exists(cfg.logdir + '/cnn_baseline/{}/train_log/'.format(dataset_name)):
            os.makedirs(cfg.logdir + '/cnn_baseline/{}/train_log/'.format(dataset_name))
        summary_writer = tf.summary.FileWriter(
            cfg.logdir + '/cnn_baseline/{}/train_log/'.format(dataset_name), graph=sess.graph)

        """Main loop."""
        for step in range(cfg.epoch * num_batches_per_epoch + 1):
            tic = time.time()
            """"TF queue would pop batch until no file"""
            try:
                _, loss_value, summary_str = sess.run(
                    [train_op, loss, summary_op])
                logger.info('%d iteration finishs in ' % step + '%f second' %
                            (time.time() - tic) + ' loss=%f' % loss_value)
            except KeyboardInterrupt:
                sess.close()
                sys.exit()
            except tf.errors.InvalidArgumentError:
                logger.warning('%d iteration contains NaN gradients. Discard.' % step)
                continue
            else:
                """Write to summary."""
                if step % 5 == 0:
                    summary_writer.add_summary(summary_str, step)

                """Epoch wise linear annealling."""
                if (step % num_batches_per_epoch) == 0:

                    """Save model periodically"""
                    ckpt_path = os.path.join(
                        cfg.logdir + '/cnn_baseline/{}'.format(dataset_name), 'model-{:.4f}.ckpt'.format(loss_value))
                    saver.save(sess, ckpt_path, global_step=step)

        """Join threads"""
        coord.join(threads)
Пример #17
0
def main(args):
    """Get dataset hyperparameters."""
    assert len(args) == 2 and isinstance(args[1], str)
    dataset_name = args[1]
    logger.info('Using dataset: {}'.format(dataset_name))
    """Set reproduciable random seed"""
    tf.set_random_seed(1234)

    coord_add = get_coord_add(dataset_name)
    dataset_size = get_dataset_size_train(dataset_name)
    num_classes = get_num_classes(dataset_name)

    # Prepare Training Data
    (x_train, y_train), (x_test, y_test) = utils.load_mnist_excluded()

    with tf.Graph().as_default():  #, tf.device('/cpu:0'):

        # Placeholders for input data and the targets
        x_input = tf.placeholder(tf.float32, (None, *IMG_DIM), name='Input')
        y_target = tf.placeholder(tf.int32, [
            None,
        ], name='Target')
        """Get global_step."""
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        """Get batches per epoch."""
        num_batches_per_epoch = int(dataset_size / cfg.batch_size)
        """Use exponential decay leanring rate?"""
        lrn_rate = tf.maximum(
            tf.train.exponential_decay(1e-3, global_step,
                                       num_batches_per_epoch, 0.8), 1e-5)
        tf.summary.scalar('learning_rate', lrn_rate)
        opt = tf.train.AdamOptimizer()  # lrn_rate
        """Define the dataflow graph."""
        m_op = tf.placeholder(dtype=tf.float32, shape=())
        with tf.device('/gpu:0'):
            with slim.arg_scope([slim.variable]):  #, device='/cpu:0'):
                sample_batch = tf.identity(x_input)
                batch_labels = tf.identity(y_target)
                batch_squash = tf.divide(sample_batch, 255.)
                batch_x = slim.batch_norm(sample_batch,
                                          center=False,
                                          is_training=True,
                                          trainable=True)
                output, pose_out = net.build_arch(batch_x,
                                                  coord_add,
                                                  is_train=True,
                                                  num_classes=num_classes)

                tf.logging.debug(pose_out.get_shape())
                loss, spread_loss, mse, reconstruction = net.spread_loss(
                    output, pose_out, batch_squash, batch_labels, m_op)
                sample_batch = tf.squeeze(sample_batch)
                decode_res_op = tf.concat([
                    sample_batch,
                    255 * tf.reshape(reconstruction,
                                     [cfg.batch_size, IMAGE_SIZE, IMAGE_SIZE])
                ],
                                          axis=0)
                acc = net.test_accuracy(output, batch_labels)
                tf.summary.scalar('spread_loss', spread_loss)
                tf.summary.scalar('reconstruction_loss', mse)
                tf.summary.scalar('all_loss', loss)
                tf.summary.scalar('train__batch_acc', acc)
            """Compute gradient."""
            grad = opt.compute_gradients(loss)
            # See: https://stackoverflow.com/questions/40701712/how-to-check-nan-in-gradients-in-tensorflow-when-updating
            grad_check = [
                tf.check_numerics(g, message='Gradient NaN Found!')
                for g, _ in grad if g is not None
            ] + [tf.check_numerics(loss, message='Loss NaN Found')]
        """Apply graident."""
        with tf.control_dependencies(grad_check):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = opt.apply_gradients(grad, global_step=global_step)
        """Set Session settings."""
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False))
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        """Set Saver."""
        var_to_save = [
            v for v in tf.global_variables() if 'Adam' not in v.name
        ]  # Don't save redundant Adam beta/gamma
        saver = tf.train.Saver(var_list=var_to_save, max_to_keep=cfg.epoch)
        """Display parameters"""
        total_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in var_to_save
        ]).astype(np.int32)
        train_p = np.sum([
            np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()
        ]).astype(np.int32)
        logger.info('Total Parameters: {}'.format(total_p))
        logger.info('Trainable Parameters: {}'.format(train_p))

        # read snapshot
        # latest = os.path.join(cfg.logdir, 'model.ckpt-4680')
        # saver.restore(sess, latest)
        """Set summary op."""
        summary_op = tf.summary.merge_all()
        """Start coord & queue."""
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        """Set summary writer"""
        if not os.path.exists(cfg.logdir +
                              '/caps/{}/train_log/'.format(dataset_name)):
            os.makedirs(cfg.logdir +
                        '/caps/{}/train_log/'.format(dataset_name))
        summary_writer = tf.summary.FileWriter(
            cfg.logdir + '/caps/{}/train_log/'.format(dataset_name),
            graph=sess.graph)  # graph = sess.graph, huge!

        if not os.path.exists(cfg.logdir +
                              '/caps/{}/images/'.format(dataset_name)):
            os.makedirs(cfg.logdir + '/caps/{}/images/'.format(dataset_name))
        """Main loop."""
        m_min = 0.2
        m_max = 0.9
        m = m_min
        max_iter = cfg.epoch * num_batches_per_epoch + 1

        for step in range(max_iter):
            tic = time.time()
            """"TF queue would pop batch until no file"""

            batch_x, batch_y = utils.get_random_mnist_batch(
                x_train, y_train, cfg.batch_size)

            try:
                _, loss_value, train_acc_val, summary_str, mse_value = sess.run(
                    [train_op, loss, acc, summary_op, mse],
                    feed_dict={
                        m_op: m,
                        x_input: batch_x,
                        y_target: batch_y
                    })

                sys.stdout.write(ERASE_LINE)
                sys.stdout.write('\r\r%d/%d iteration finishes in ' %
                                 (step, max_iter) + '%f second' %
                                 (time.time() - tic) +
                                 ' training accuracy = %f' % train_acc_val +
                                 ' loss=%f' % loss_value +
                                 '\treconstruction_loss=%f' % mse_value)
                sys.stdout.flush()
                time.sleep(0.001)

            except KeyboardInterrupt:
                sess.close()
                sys.exit()
            except tf.errors.InvalidArgumentError:
                logger.warning(
                    '%d iteration contains NaN gradients. Discard.' % step)
                continue
            else:
                """Write to summary."""
                if step % 10 == 0:
                    summary_writer.add_summary(summary_str, step)

                if step % 200 == 0:
                    images = sess.run(decode_res_op,
                                      feed_dict={
                                          m_op: m,
                                          x_input: batch_x,
                                          y_target: batch_y
                                      })
                    image = combine_images(images)
                    img_name = cfg.logdir + '/caps/{}/images/'.format(
                        dataset_name) + "/step_{}.png".format(str(step))
                    Image.fromarray(image.astype(np.uint8)).save(img_name)
                """Epoch wise linear annealling."""
                if (step % num_batches_per_epoch) == 0:
                    if step > 0:
                        m += (m_max - m_min) / (cfg.epoch * cfg.m_schedule)
                        if m > m_max:
                            m = m_max
                    """Save model periodically """
                    ckpt_path = os.path.join(
                        cfg.logdir + '/caps/{}/'.format(dataset_name),
                        'model-{:.4f}.ckpt'.format(loss_value))
                    saver.save(sess, ckpt_path, global_step=step)

        ckpt_path = os.path.join(cfg.logdir + '/caps/{}/'.format(dataset_name),
                                 'finall-model-{:.4f}.ckpt'.format(loss_value))
        saver.save(sess, ckpt_path, global_step=step)

        print('Training is finished!')