示例#1
0
 def __init__(self, obj_threshold, nms_threshold, classes_file,
              anchors_file):
     """
     Introduction
     ------------
         初始化函数
     Parameters
     ----------
         obj_threshold: 目标检测为物体的阈值
         nms_threshold: nms阈值
     """
     self.obj_threshold = obj_threshold
     self.nms_threshold = nms_threshold
     self.classes_path = classes_file
     self.anchors_path = anchors_file
     self.class_names = self._get_class()
     self.anchors = self._get_anchors()
     hsv_tuples = [(x / len(self.class_names), 1., 1.)
                   for x in range(len(self.class_names))]
     self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
     self.colors = list(
         map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
             self.colors))
     random.seed(10101)
     random.shuffle(self.colors)
     random.seed(None)
     self.model = yolo(config.norm_epsilon,
                       config.norm_decay,
                       self.anchors_path,
                       self.classes_path,
                       pre_train=False)
 def predict(self, inputs, image_shape):
     """
     Introduction
     ------------
         构建预测模型
     Parameters
     ----------
         inputs: 处理之后的输入图片
         image_shape: 图像原始大小
     Returns
     -------
         boxes: 物体框坐标
         scores: 物体概率值
         classes: 物体类别
     """
     model = yolo(config.norm_epsilon,
                  config.norm_decay,
                  self.anchors_path,
                  self.classes_path,
                  pre_train=False)
     output = model.yolo_inference(inputs,
                                   config.num_anchors // 3,
                                   config.num_classes,
                                   training=False)
     boxes, scores, classes = self.eval(output, image_shape, max_boxes=20)
     return boxes, scores, classes
 def predict(self, inputs, image_shape):
     """
     Introduction
     ------------
         detect the boxes and their classes
     Parameters
     ----------
         inputs: the image 
         image_shape: 
     Returns
     -------
         boxes: All of the boxes. 
         scores: scores for each box.
         classes: for each box.
     """
     a
     model = yolo(config.norm_epsilon,
                  config.norm_decay,
                  self.anchors_path,
                  self.classes_path,
                  pre_train=False)
     output = model.yolo_inference(inputs,
                                   config.num_anchors // 3,
                                   config.num_classes,
                                   training=False)
     boxes, scores, classes = self.eval(output, image_shape, max_boxes=20)
     return boxes, scores, classes
示例#4
0
    def predict(self, inputs, image_shape):

        model = yolo(config.norm_epsilon,
                     config.norm_decay,
                     self.anchors_path,
                     self.classes_path,
                     pre_train=False)
        output = model.yolo_inference(inputs,
                                      config.num_anchors // 3,
                                      config.num_classes,
                                      training=False)
        boxes, scores, classes = self.eval(output, image_shape, max_boxes=20)
        return boxes, scores, classes
def train():
    """
    Introduction
    ------------
        This function will train the yolo3 (and you can just load the darknet weights)
    """
    train_reader = Reader('train',
                          config.data_dir,
                          config.anchors_path,
                          config.num_classes,
                          input_shape=config.input_shape,
                          max_boxes=config.max_boxes)
    train_data = train_reader.build_dataset(config.train_batch_size)
    is_training = tf.placeholder(tf.bool, shape=[])
    iterator = train_data.make_one_shot_iterator()
    images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = iterator.get_next(
    )
    images.set_shape([None, config.input_shape, config.input_shape, 3])
    bbox.set_shape([None, config.max_boxes, 5])
    grid_shapes = [
        config.input_shape // 32, config.input_shape // 16,
        config.input_shape // 8
    ]
    bbox_true_13.set_shape(
        [None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes])
    bbox_true_26.set_shape(
        [None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes])
    bbox_true_52.set_shape(
        [None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes])
    # draw_box(images, bbox)
    model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path,
                 config.classes_path, config.pre_train)
    bbox_true = [bbox_true_13]
    output = model.yolo_inference(images, config.num_anchors / 3,
                                  config.num_classes, is_training)
    loss, loss_xy, loss_wh, loss_conf, loss_class = model.yolo_loss(
        output, bbox_true, model.anchors, config.num_classes,
        config.ignore_thresh)
    l2_loss = tf.losses.get_regularization_loss()
    loss += l2_loss
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('loss_xy', loss_xy)
    tf.summary.scalar('loss_wh', loss_wh)
    tf.summary.scalar('loss_conf', loss_conf)
    # tf.summary.scalar('loss_class',loss_class)
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(config.learning_rate,
                                    global_step,
                                    decay_steps=config.decay_step,
                                    decay_rate=0.96)
    tf.summary.scalar('learning rate', lr)
    merged_summary = tf.summary.merge_all()
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    # 如果读取预训练权重,则冻结darknet53网络的变量
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        if config.pre_train:
            train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope='yolo')
            train_op = optimizer.minimize(loss=loss,
                                          global_step=global_step,
                                          var_list=train_var)
        else:
            train_op = optimizer.minimize(loss=loss, global_step=global_step)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess:
        ckpt = tf.train.get_checkpoint_state(config.model_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print('restore model', ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(init)
        if config.pre_train is True:
            load_ops = load_weights(tf.global_variables(scope='darknet53'),
                                    config.darknet53_weights_path)
            sess.run(load_ops)
        summary_writer = tf.summary.FileWriter(config.log_dir, sess.graph)
        loss_value = 0
        for epoch in range(config.Epoch):
            for step in range(int(config.train_num / config.train_batch_size)):
                try:
                    start_time = time.time()

                    summary, train_loss, train_loss_xy, train_loss_wh, train_loss_conf, train_loss_class, global_step_value, _ =\
                        sess.run([merged_summary, loss,loss_xy, loss_wh, loss_conf, loss_class,
                                global_step, train_op], feed_dict={is_training: True})
                    loss_value += train_loss
                    duration = time.time() - start_time
                    examples_per_sec = float(
                        duration) / config.train_batch_size
                    format_str = (
                        'Epoch {} step {}, avg los: {:.3f}, train loss = {:.3f}, gs: {}, xy:{:.3f}, wh:{:.3f}, conf:{:.3f}, class:{:.3f} ( {:.3f} examples/sec; {:.3f} '
                        'sec/batch)')
                    # format_str = (
                    # 'Epoch {} step {}, avg los: {:.3f}, train loss = {:.3f}, gs: {}  ( {:.3f} examples/sec; {:.3f} ''sec/batch)')
                    # print('.')
                    # print(format_str.format(epoch, step, train_loss, global_step_value, examples_per_sec, duration))
                    print(
                        format_str.format(epoch, step,
                                          loss_value / global_step_value,
                                          train_loss, global_step_value,
                                          train_loss_xy, train_loss_wh,
                                          train_loss_conf, train_loss_class,
                                          examples_per_sec, duration))
                    # print(format_str.format(epoch, step, loss_value / global_step_value,  train_loss, global_step_value,
                    # examples_per_sec, duration))
                    summary_writer.add_summary(summary=tf.Summary(value=[
                        tf.Summary.Value(tag="train loss",
                                         simple_value=train_loss)
                    ]),
                                               global_step=step)
                    summary_writer.add_summary(summary, global_step_value)
                    if step % 100:
                        summary_writer.flush()
                except Exception as ex:
                    print(ex)
            # 每3个epoch保存一次模型
            # if epoch % 1 == 0:
            checkpoint_path = os.path.join(config.model_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=global_step_value)
            print('saved')
示例#6
0
def train():
    """
    Introduction
    ------------
        训练模型
    """
    with tf.Graph().as_default(), tf.device("/cpu:0"):
        train_reader = Reader('train',
                              config.data_dir,
                              config.anchors_path,
                              config.num_classes,
                              input_shape=config.input_shape,
                              max_boxes=config.max_boxes,
                              shuffle_size=config.shuffle_size)
        train_data = train_reader.build_dataset(config.train_batch_size)
        is_training = tf.placeholder(tf.bool, shape=[])
        iterator = train_data.make_one_shot_iterator()
        images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = iterator.get_next(
        )
        images.set_shape([None, config.input_shape, config.input_shape, 3])
        bbox.set_shape([None, config.max_boxes, 5])
        grid_shapes = [
            config.input_shape // 32, config.input_shape // 16,
            config.input_shape // 8
        ]
        bbox_true_13.set_shape(
            [None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes])
        bbox_true_26.set_shape(
            [None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes])
        bbox_true_52.set_shape(
            [None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes])
        #draw_box(images, bbox)
        #split data for training
        images_list = tf.split(images, gpu_num)
        #bbox_list = tf.split(bbox, gpu_num)
        bbox_true_13_list = tf.split(bbox_true_13, gpu_num)
        bbox_true_26_list = tf.split(bbox_true_26, gpu_num)
        bbox_true_52_list = tf.split(bbox_true_52, gpu_num)

        global_step = tf.Variable(0, trainable=False)
        lr = tf.train.exponential_decay(
            config.learning_rate,
            global_step,
            decay_steps=1000,
            decay_rate=0.8
        )  #decay_steps 约等于一个epoch  = total_sample/batchsize int(config.train_num/config.train_batch_size)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)

        tower_grads = []
        tower_loss = []
        with tf.variable_scope(tf.get_variable_scope()):
            for gpu_id in range(gpu_num):
                with tf.device('/gpu:%d' % gpu_id):
                    with tf.name_scope('%s_%d' % ('tower', gpu_id)):
                        model = yolo(config.norm_epsilon, config.norm_decay,
                                     config.anchors_path, config.classes_path,
                                     config.pre_train)
                        bbox_true = [
                            bbox_true_13_list[gpu_id],
                            bbox_true_26_list[gpu_id],
                            bbox_true_52_list[gpu_id]
                        ]
                        output = model.yolo_inference(images_list[gpu_id],
                                                      config.num_anchors / 3,
                                                      config.num_classes,
                                                      is_training)
                        loss = model.yolo_loss(output, bbox_true,
                                               model.anchors,
                                               config.num_classes,
                                               config.ignore_thresh)
                        l2_loss = tf.losses.get_regularization_loss()
                        loss += l2_loss

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(loss)
                        tower_grads.append(grads)
                        tower_loss.append(loss)
        loss = average_loss(tower_loss)
        grads = average_gradients(tower_grads)
        #train_op = optimizer.apply_gradients(grads, global_step=global_step)

        tf.summary.scalar('loss', loss)
        merged_summary = tf.summary.merge_all()

        # 如果读取预训练权重,则冻结darknet53网络的变量
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            if config.pre_train:
                train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                              scope='yolo')
                #train_op = optimizer.minimize(loss = loss, global_step = global_step, var_list = train_var)
                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)
            else:
                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        with tf.Session(
                config=tf.ConfigProto(log_device_placement=False,
                                      allow_soft_placement=True)) as sess:
            ckpt = tf.train.get_checkpoint_state(config.model_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print('restore model', ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(init)
            if config.pre_train is True:
                load_ops = load_weights(tf.global_variables(scope='darknet53'),
                                        config.darknet53_weights_path)
                sess.run(load_ops)
                print("pretrained the model")
            summary_writer = tf.summary.FileWriter(config.log_dir, sess.graph)
            loss_value = 0
            for epoch in range(config.Epoch):
                for step in range(
                        int(config.train_num / config.train_batch_size)):
                    start_time = time.time()
                    train_loss, summary, global_step_value, lr_step, _ = sess.run(
                        [loss, merged_summary, global_step, lr, train_op],
                        {is_training: True})
                    loss_value += train_loss
                    duration = time.time() - start_time
                    examples_per_sec = float(
                        duration) / config.train_batch_size
                    format_str = (
                        'Epoch {} step {} lr = {}, loss_step = {} train loss = {} ( {} examples/sec; {} '
                        'sec/batch)')
                    print(
                        format_str.format(epoch, step, lr_step, train_loss,
                                          loss_value / global_step_value,
                                          examples_per_sec, duration))
                    summary_writer.add_summary(
                        summary=tf.Summary(value=[
                            tf.Summary.Value(tag="train loss",
                                             simple_value=train_loss)
                        ]),
                        global_step=step + epoch *
                        int(config.train_num / config.train_batch_size))
                    summary_writer.add_summary(
                        summary,
                        global_step=step + epoch *
                        int(config.train_num / config.train_batch_size))
                    summary_writer.flush()
                    # 每3个epoch保存一次模型
                if epoch % 2 == 0:
                    checkpoint_path = os.path.join(config.model_dir,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=global_step)
示例#7
0
def dstributed_train(ps_hosts, worker_hosts, job_name, task_index):
    """
    Introduction
    ------------
        分布式训练
    Parameters
    ----------
        ps_hosts: sever的host
        worker_hosts: worker的host
        job_name: 判断是作为ps还是worker
        task_index: 任务index
    """
    ps_hosts = ps_hosts.split(',')
    worker_hosts = worker_hosts.split(',')
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster, job_name = job_name, task_index = task_index)
    if job_name == 'ps':
        server.join()
    else:
        with tf.device(tf.train.replica_device_setter(worker_device = "/job:worker/task:%d" % task_index, cluster = cluster)):
            train_data = Reader('train', config.data_dir, config.anchors_path, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes, jitter=config.jitter, hue=config.hue, sat=config.sat, cont=config.cont, bri=config.bri)
            val_data = Reader('val', config.data_dir, config.anchors_path, config.num_classes, input_shape=config.input_shape, max_boxes=config.max_boxes)
            images_train, bbox_true_13_train, bbox_true_26_train, bbox_true_52_train = train_data.provide(config.train_batch_size)
            images_val, bbox_true_13_val, bbox_true_26_val, bbox_true_52_val = val_data.provide(config.val_batch_size)
            model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path, config.classes_path, config.pre_train)
            is_training = tf.placeholder(dtype=tf.bool, shape=[])
            images = tf.placeholder(shape=[None, 416, 416, 3], dtype=tf.float32)
            bbox_true_13 = tf.placeholder(shape=[None, 13, 13, 3, 85], dtype=tf.float32)
            bbox_true_26 = tf.placeholder(shape=[None, 26, 26, 3, 85], dtype=tf.float32)
            bbox_true_52 = tf.placeholder(shape=[None, 52, 52, 3, 85], dtype=tf.float32)
            bbox_true = [bbox_true_13, bbox_true_26, bbox_true_52]
            output = model.yolo_inference(images, config.num_anchors / 3, config.num_classes, is_training)
            loss = model.yolo_loss(output, bbox_true, model.anchors, config.num_classes, config.ignore_thresh)
            tf.summary.scalar('loss', loss)
            global_step = tf.Variable(0, trainable=False)
            learning_rate = tf.train.exponential_decay(config.learning_rate, global_step, 20000, 0.1, staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            # 如果读取预训练权重,则冻结darknet53网络的变量
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                if config.pre_train:
                    train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='yolo')
                    train_op = optimizer.minimize(loss=loss, global_step=global_step, var_list=train_var)
                else:
                    train_op = optimizer.minimize(loss=loss, global_step=global_step)
            init = tf.global_variables_initializer()
            saver = tf.train.Saver()
            with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess:
                ckpt = tf.train.get_checkpoint_state(config.model_dir)
                if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                    print('restore model', ckpt.model_checkpoint_path)
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    sess.run(init)
                if config.pre_train is True:
                    load_ops = model.load_weights(tf.global_variables(scope='darknet53'), config.darknet53_weights_path)
                    sess.run(load_ops)
                summary_writer = tf.summary.FileWriter('./logs', sess.graph)
                tf.train.start_queue_runners(sess=sess)
                for epoch in range(config.Epoch):
                    for step in range(int(config.train_num / config.train_batch_size)):
                        start_time = time.time()
                        images_value, bbox_true_13_value, bbox_true_26_value, bbox_true_52_value = sess.run(
                            [images_train, bbox_true_13_train, bbox_true_26_train, bbox_true_52_train])
                        train_loss, _ = sess.run([loss, train_op], {images: images_value, bbox_true_13: bbox_true_13_value, bbox_true_26: bbox_true_26_value, bbox_true_52: bbox_true_52_value, is_training: True})
                        duration = time.time() - start_time
                        examples_per_sec = float(duration) / config.train_batch_size
                        format_str = ('Epoch {} step {},  train loss = {} ( {} examples/sec; {} ''sec/batch)')
                        print(format_str.format(epoch, step, train_loss, examples_per_sec, duration))
                        summary_writer.add_summary(summary=tf.Summary(value=[tf.Summary.Value(tag="train loss", simple_value=train_loss)]), global_step=step)
                        summary_writer.flush()
                    for step in range(int(config.val_num / config.val_batch_size)):
                        start_time = time.time()
                        images_value, bbox_true_13_value, bbox_true_26_value, bbox_true_52_value = sess.run([images_val, bbox_true_13_val, bbox_true_26_val, bbox_true_52_val])
                        val_loss = sess.run(loss, {images: images_value, bbox_true_13: bbox_true_13_value, bbox_true_26: bbox_true_26_value, bbox_true_52: bbox_true_52_value, is_training: False})
                        duration = time.time() - start_time
                        examples_per_sec = float(duration) / config.val_batch_size
                        format_str = ('Epoch {} step {}, val loss = {} ({} examples/sec; {} ''sec/batch)')
                        print(format_str.format(epoch, step, val_loss, examples_per_sec, duration))
                        summary_writer.add_summary(summary = tf.Summary(value=[tf.Summary.Value(tag = "val loss", simple_value = val_loss)]), global_step = step)
                        summary_writer.flush()
                    # 每3个epoch保存一次模型
                    if epoch % 3 == 0:
                        checkpoint_path = os.path.join(config.model_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=epoch)
示例#8
0
def train():
    """
    Introduction
    ------------
        训练模型
    """
    # 指定使用GPU的Index
    os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_index
    train_data = Reader('train', config.data_dir, config.anchors_path, config.num_classes, input_shape = config.input_shape, max_boxes = config.max_boxes, jitter = config.jitter, hue = config.hue, sat = config.sat, cont = config.cont, bri = config.bri)
    val_data = Reader('val', config.data_dir, config.anchors_path, config.num_classes, input_shape = config.input_shape, max_boxes = config.max_boxes)
    images_train, bbox_true_13_train, bbox_true_26_train, bbox_true_52_train = train_data.provide(config.train_batch_size)
    images_val, bbox_true_13_val, bbox_true_26_val, bbox_true_52_val = val_data.provide(config.val_batch_size)

    model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path, config.classes_path, config.pre_train)
    is_training = tf.placeholder(dtype = tf.bool, shape = [])
    images = tf.placeholder(shape = [None, 416, 416, 3], dtype = tf.float32)
    bbox_true_13 = tf.placeholder(shape = [None, 13, 13, 3, 85], dtype = tf.float32)
    bbox_true_26 = tf.placeholder(shape = [None, 26, 26, 3, 85], dtype = tf.float32)
    bbox_true_52 = tf.placeholder(shape = [None, 52, 52, 3, 85], dtype = tf.float32)
    bbox_true = [bbox_true_13, bbox_true_26, bbox_true_52]
    output = model.yolo_inference(images, config.num_anchors / 3, config.num_classes, is_training)
    loss = model.yolo_loss(output, bbox_true, model.anchors, config.num_classes, config.ignore_thresh)
    tf.summary.scalar('loss', loss)
    global_step = tf.Variable(0, trainable = False)
    learning_rate = tf.train.exponential_decay(config.learning_rate, global_step, 3000, 0.1)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    # 如果读取预训练权重,则冻结darknet53网络的变量
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        if config.pre_train:
            train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='yolo')
            train_op = optimizer.minimize(loss = loss, global_step = global_step, var_list = train_var)
        else:
            train_op = optimizer.minimize(loss = loss, global_step = global_step)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session(config = tf.ConfigProto(log_device_placement = False)) as sess:
        ckpt = tf.train.get_checkpoint_state(config.model_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print('restore model', ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(init)
        if config.pre_train is True:
            load_ops = model.load_weights(tf.global_variables(scope = 'darknet53'), config.darknet53_weights_path)
            sess.run(load_ops)
        summary_writer = tf.summary.FileWriter('./logs', sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess = sess, coord = coord)
        for epoch in range(config.Epoch):
            for step in range(int(config.train_num / config.train_batch_size)):
                start_time = time.time()
                images_value, bbox_true_13_value, bbox_true_26_value, bbox_true_52_value = sess.run([images_train, bbox_true_13_train, bbox_true_26_train, bbox_true_52_train])
                train_loss, _ = sess.run([loss, train_op], {images : images_value, bbox_true_13 : bbox_true_13_value, bbox_true_26 : bbox_true_26_value, bbox_true_52 : bbox_true_52_value, is_training : True})
                duration = time.time() - start_time
                examples_per_sec = float(duration) / config.train_batch_size
                format_str = ('Epoch {} step {},  train loss = {} ( {} examples/sec; {} ''sec/batch)')
                print(format_str.format(epoch, step, train_loss, examples_per_sec, duration))
                summary_writer.add_summary(summary = tf.Summary(value = [tf.Summary.Value(tag = "train loss", simple_value = train_loss)]), global_step = step)
                summary_writer.flush()
            for step in range(int(config.val_num / config.val_batch_size)):
                start_time = time.time()
                images_value, bbox_true_13_value, bbox_true_26_value, bbox_true_52_value = sess.run([images_val, bbox_true_13_val, bbox_true_26_val, bbox_true_52_val])
                val_loss = sess.run(loss, {images: images_value, bbox_true_13: bbox_true_13_value, bbox_true_26: bbox_true_26_value, bbox_true_52: bbox_true_52_value , is_training: False})
                duration = time.time() - start_time
                examples_per_sec = float(duration) / config.val_batch_size
                format_str = ('Epoch {} step {}, val loss = {} ({} examples/sec; {} ''sec/batch)')
                print(format_str.format(epoch, step, val_loss, examples_per_sec, duration))
                summary_writer.add_summary(summary = tf.Summary(value = [tf.Summary.Value(tag = "val loss", simple_value = val_loss)]), global_step = step)
                summary_writer.flush()
            # 每3个epoch保存一次模型
            if epoch % 3 == 0:
                checkpoint_path = os.path.join(config.model_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step = global_step)
        coord.request_stop()
        coord.join(threads)
示例#9
0
def train():
    """
    Introduction
    ------------

    """
    # train_reader = Reader('train', config.data_dir, config.anchors_path, config.num_classes, input_shape = config.input_shape, max_boxes = config.max_boxes)

    train_reader = Reader('train',
                          config.record_file,
                          config.anchors_path,
                          config.num_classes,
                          input_shape=config.input_shape,
                          max_boxes=config.max_boxes)
    train_data = train_reader.build_dataset(config.train_batch_size)
    is_training = tf.placeholder(tf.bool, shape=[])
    iterator = train_data.make_one_shot_iterator()
    images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = iterator.get_next(
    )
    images.set_shape([None, config.input_shape, config.input_shape, 3])
    bbox.set_shape([None, config.max_boxes, 5])
    grid_shapes = [
        config.input_shape // 32, config.input_shape // 16,
        config.input_shape // 8
    ]
    bbox_true_13.set_shape(
        [None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes])
    bbox_true_26.set_shape(
        [None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes])
    bbox_true_52.set_shape(
        [None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes])
    draw_box(images, bbox)
    model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path,
                 config.classes_path, config.pre_train)
    bbox_true = [bbox_true_13, bbox_true_26, bbox_true_52]
    output = model.yolo_inference(images, config.num_anchors / 3,
                                  config.num_classes, is_training)
    loss = model.yolo_loss(output, bbox_true, model.anchors,
                           config.num_classes, config.ignore_thresh)
    l2_loss = tf.losses.get_regularization_loss()
    loss += l2_loss
    tf.summary.scalar('loss', loss)
    merged_summary = tf.summary.merge_all()
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(config.learning_rate,
                                    global_step,
                                    decay_steps=2000,
                                    decay_rate=0.8)
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    # darknet53
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        if config.pre_train:
            train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope='yolo')
            train_op = optimizer.minimize(loss=loss,
                                          global_step=global_step,
                                          var_list=train_var)
        else:
            train_op = optimizer.minimize(loss=loss, global_step=global_step)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess:
        with tf.device("/gpu:0"):
            ckpt = tf.train.get_checkpoint_state(config.model_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print('restore model', ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(init)
            if config.pre_train is True:
                load_ops = load_weights(tf.global_variables(scope='darknet53'),
                                        config.darknet53_weights_path)
                sess.run(load_ops)
            summary_writer = tf.summary.FileWriter(config.log_dir, sess.graph)
            loss_value = 0
            max_iter = config.train_num / config.train_batch_size
            for epoch in range(config.Epoch):
                for step in range(
                        int(config.train_num / config.train_batch_size)):
                    start_time = time.time()
                    train_loss, summary, global_step_value, _ = sess.run(
                        [loss, merged_summary, global_step, train_op],
                        {is_training: True})
                    loss_value += train_loss
                    step_count = epoch * max_iter + step
                    if step_count % 20 == 0 and step_count != 0:
                        duration = time.time() - start_time
                        examples_per_sec = float(
                            duration) / config.train_batch_size
                        format_str = (
                            'Epoch {} step {},  train loss = {} ( {} examples/sec; {} '
                            'sec/batch)')
                        print(
                            format_str.format(epoch, step, loss_value / 20,
                                              examples_per_sec, duration))
                        loss_value = 0
                        summary_writer.add_summary(summary=tf.Summary(value=[
                            tf.Summary.Value(tag="train loss",
                                             simple_value=train_loss)
                        ]),
                                                   global_step=step)
                        summary_writer.add_summary(summary, step)
                        summary_writer.flush()
                        if step_count % 500 == 0:
                            checkpoint_path = os.path.join(
                                config.model_dir, 'model.ckpt')
                            saver.save(sess,
                                       checkpoint_path,
                                       global_step=global_step)
                checkpoint_path = os.path.join(config.model_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=global_step)
示例#10
0
def train():
    """
    Introduction
    ------------
        训练模型
    """
    # gpu_num = check_available_gpus()
    #
    # for gpu_id in range(int(gpu_num)):
    # with tf.device(tf.DeviceSpec(device_type="GPU", device_index=gpu_id)):

    # with tf.variable_scope(tf.get_variable_scope(), reuse=(gpu_id > 0)):
    # with tf.variable_scope(tf.get_variable_scope(), reuse=False):

    #-----------------------train_data-------------------------
    train_reader = Reader('train',
                          config.data_dir,
                          config.anchors_path2,
                          config.num_classes,
                          input_shape=config.input_shape,
                          max_boxes=config.max_boxes)
    train_data = train_reader.build_dataset(config.train_batch_size)
    is_training = tf.placeholder(tf.bool, shape=[])
    iterator = train_data.make_one_shot_iterator()
    images, bbox, bbox_true_13, bbox_true_26, bbox_true_52 = iterator.get_next(
    )

    #-----------------------  definition-------------------------
    images.set_shape([None, config.input_shape, config.input_shape, 3])
    bbox.set_shape([None, config.max_boxes, 5])
    grid_shapes = [
        config.input_shape // 32, config.input_shape // 16,
        config.input_shape // 8
    ]
    lr_images = tf.image.resize_images(
        images,
        size=[config.input_shape // 4, config.input_shape // 4],
        method=0,
        align_corners=False)
    lr_images.set_shape(
        [None, config.input_shape // 4, config.input_shape // 4, 3])
    bbox_true_13.set_shape(
        [None, grid_shapes[0], grid_shapes[0], 3, 5 + config.num_classes])
    bbox_true_26.set_shape(
        [None, grid_shapes[1], grid_shapes[1], 3, 5 + config.num_classes])
    bbox_true_52.set_shape(
        [None, grid_shapes[2], grid_shapes[2], 3, 5 + config.num_classes])
    bbox_true = [bbox_true_13, bbox_true_26, bbox_true_52]

    #------------------------summary + draw-----------------------------------
    tf.summary.image('input1', images, max_outputs=3)
    draw_box(images, bbox)

    #------------------------------model---------------------------------
    model = yolo(config.norm_epsilon, config.norm_decay, config.anchors_path2,
                 config.classes_path, config.pre_train)
    # with tf.variable_scope("train_var"):
    # g_img1 = model.GAN_g1(lr_images)
    # print(g_img1.outputs)
    # tf.summary.image('img', g_img1.outputs, 3)
    # g_img2 = model.GAN_g2(g_img1)
    # print(model.g_variables)
    # net_g1 = model.GAN_g1(lr_images, is_train=True)
    with tf.variable_scope("model_gd"):
        net_g1 = model.GAN_g(lr_images, is_train=True, mask=False)
        net_g = model.GAN_g(lr_images, is_train=True, reuse=True, mask=True)

        d_real = model.yolo_inference(images,
                                      config.num_anchors / 3,
                                      config.num_classes,
                                      training=True)
        tf.get_variable_scope().reuse_variables()
        d_fake = model.yolo_inference(net_g.outputs,
                                      config.num_anchors / 3,
                                      config.num_classes,
                                      training=True)

    #---------------------------d_loss---------------------------------
    d_loss1 = model.yolo_loss(d_real, bbox_true, model.anchors,
                              config.num_classes, 1, config.ignore_thresh)
    d_loss2 = model.yolo_loss(d_fake, bbox_true, model.anchors,
                              config.num_classes, 0, config.ignore_thresh)
    d_loss = d_loss1 + d_loss2
    l2_loss = tf.losses.get_regularization_loss()
    d_loss += l2_loss

    #--------------------------g_loss------------------------------------
    adv_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(
        d_fake[3]),
                                                       logits=d_fake[3])
    # adv_loss = 1e-3 * tf.reduce_sum(adv_loss) / tf.cast(tf.shape(d_fake[3])[0], tf.float32)
    adv_loss = tf.reduce_sum(adv_loss) / tf.cast(
        tf.shape(d_fake[3])[0], tf.float32)
    mse_loss1 = tl.cost.mean_squared_error(net_g1.outputs,
                                           images,
                                           is_mean=True)
    mse_loss1 = tf.reduce_sum(mse_loss1) / tf.cast(
        tf.shape(net_g1.outputs)[0], tf.float32)
    mse_loss2 = tl.cost.mean_squared_error(net_g.outputs, images, is_mean=True)
    mse_loss2 = tf.reduce_sum(mse_loss2) / tf.cast(
        tf.shape(net_g.outputs)[0], tf.float32)
    mse_loss = mse_loss1 + mse_loss2
    # clc_loss = 2e-6 * d_loss2
    clc_loss = model.yolo_loss(d_fake, bbox_true, model.anchors,
                               config.num_classes, 1, config.ignore_thresh)
    g_loss = mse_loss + adv_loss + clc_loss
    l2_loss = tf.losses.get_regularization_loss()
    g_loss += l2_loss

    #----------------summary loss-------------------------
    # tf.summary.image('img', images, 3)
    tf.summary.scalar('d_loss', d_loss)
    tf.summary.scalar('g_loss', g_loss)
    merged_summary = tf.summary.merge_all()

    #----------------------optimizer---------------------------
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(config.learning_rate,
                                    global_step,
                                    decay_steps=2000,
                                    decay_rate=0.8)
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    # 如果读取预训练权重,则冻结darknet53网络的变量
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # print(tf.all_variables())
    with tf.control_dependencies(update_ops):
        if config.pre_train:
            # aaa = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
            train_varg1 = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                scope='model_gd/generator/generator1')
            train_varg2 = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                scope='model_gd/generator/generator2')
            train_varg = train_varg1 + train_varg2
            # print(train_varg)
            train_vard = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                scope='model_gd/yolo_inference/discriminator')
            # print(train_vard)

            train_opg = optimizer.minimize(loss=g_loss,
                                           global_step=global_step,
                                           var_list=train_varg)
            train_opd = optimizer.minimize(loss=d_loss,
                                           global_step=global_step,
                                           var_list=train_vard)
        else:
            train_opd = optimizer.minimize(loss=d_loss,
                                           global_step=global_step)
            train_opg = optimizer.minimize(loss=g_loss,
                                           global_step=global_step)

    #-------------------------session-----------------------------------
    init = tf.global_variables_initializer()
    # tl.layers.print_all_variables()
    saver = tf.train.Saver()
    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=True)) as sess:
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        ckpt = tf.train.get_checkpoint_state(config.model_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print('restore model', ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(init)
        if config.pre_train is True:
            load_ops = load_weights(tf.global_variables(scope='darknet53'),
                                    config.darknet53_weights_path)
            sess.run(load_ops)
        summary_writer = tf.summary.FileWriter(config.log_dir, sess.graph)
        dloss_value = 0
        gloss_value = 0
        for epoch in range(config.Epoch):
            for step in range(int(config.train_num / config.train_batch_size)):
                start_time = time.time()
                train_dloss, summary, global_step_value, _ = sess.run(
                    [d_loss, merged_summary, global_step, train_opd],
                    {is_training: True})
                train_gloss, summary, global_step_value, _ = sess.run(
                    [g_loss, merged_summary, global_step, train_opg],
                    {is_training: True})
                dloss_value += train_dloss
                gloss_value += train_gloss
                duration = time.time() - start_time
                examples_per_sec = float(duration) / config.train_batch_size
                print(global_step_value)

                #------------------------print(epoch)--------------------------
                format_str1 = (
                    'Epoch {} step {},  train dloss = {} train gloss = {} ( {} examples/sec; {} '
                    'sec/batch)')
                print(
                    format_str1.format(epoch, step,
                                       dloss_value / global_step_value,
                                       gloss_value / global_step_value,
                                       examples_per_sec, duration))
                # print(format_str1.format(epoch, step, train_dloss, train_gloss, examples_per_sec, duration))

                #----------------------------summary loss------------------------
                summary_writer.add_summary(summary=tf.Summary(value=[
                    tf.Summary.Value(tag="train dloss",
                                     simple_value=train_dloss)
                ]),
                                           global_step=step)
                summary_writer.add_summary(summary=tf.Summary(value=[
                    tf.Summary.Value(tag="train gloss",
                                     simple_value=train_gloss)
                ]),
                                           global_step=step)
                summary_writer.add_summary(summary, step)
                summary_writer.flush()

    #--------------------------save model------------------------------
    # 每3个epoch保存一次模型
            if epoch % 3 == 0:
                checkpoint_path = os.path.join(config.model_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=global_step)