Exemplo n.º 1
0
def train_step(images, target):
    with tf.GradientTape() as tape:
        pred_results = model(images, training=True)

        decoded_tensor = []
        for i, conv_tensor in enumerate(pred_results):
            pred_tensor = loss.decode(conv_tensor, i)
            decoded_tensor.append(conv_tensor)
            decoded_tensor.append(pred_tensor)

        # Computing LOSS
        giou_loss = conf_loss = prob_loss = 0
        for i in range(3):
            conv, pred = decoded_tensor[i * 2], decoded_tensor[i * 2 + 1]
            # *用于参数前面,表示传入的多个参数将按照元组的形式存储,是一个元组;
            loss_items = loss.yolov3_loss(pred, conv, *target[i], i)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss

        gradients = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        tf.print("=> STEP %4d  lr: %.6f  giou_loss: %4.2f  conf_loss: %4.2f  "
                 "prob_loss: %4.2f  total_loss: %4.2f" %
                 (global_steps, optimizer.lr.numpy(), giou_loss, conf_loss,
                  prob_loss, total_loss))

        # update learning rate
        global_steps.assign_add(1)
        if global_steps < warmup_steps:
            lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
        else:
            lr = cfg.TRAIN.LR_END + 0.5 * (
                cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos(
                    (global_steps - warmup_steps) /
                    (total_steps - warmup_steps) * np.pi)))
        optimizer.lr.assign(lr.numpy())

        # writing summary data
        with writer.as_default():
            tf.summary.scalar("lr", optimizer.lr, step=global_steps)
            tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
            tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
            tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
            tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
        writer.flush()
Exemplo n.º 2
0
def get_boxes_and_scores(feats, #ypture
                         anchors,
                         image_shape #原图片的大小
                         ):
    """
    将预测出的box坐标转换为对应原图的坐标,然后计算每个box的分数
    :param feats: yolo输出的feature map
    :param anchors: 其中一种大小的先验框(总共三种)
    :param image_shape: 原图片的shape
    :return: boxes(具体坐标) box_scores(box分数)
    """
    box_xy,box_wh,box_confidence,box_class_probs = decode(feats, anchors, calc_loss=False)
    boxes = correct_boxes(box_xy, box_wh, image_shape)
    boxes = tf.reshape(boxes,[-1,4])
    box_scores = box_confidence * box_class_probs
    box_scores = tf.reshape(box_scores,[-1,cfg.num_classes])

    return boxes,box_scores
Exemplo n.º 3
0
def train_step(img, target, epoch):
    with tf.GradientTape() as tape:
        pred_result = model(img, training=True)
        decoded_result = decode(pred_result)
        ciou_loss = conf_loss = prob_loss = 0

        # optimizing process
        for i in range(3):
            loss_items = yolov4_loss(decoded_result[i],
                                     target[i][0],
                                     target[i][1],
                                     i=i)
            ciou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = ciou_loss + conf_loss + prob_loss

        gradients = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        tf.print(
            "=>EPOCH %3d  STEP %4d   lr: %.6f   ciou_loss: %4.2f   conf_loss: %4.2f   "
            "prob_loss: %4.2f   total_loss: %4.2f" %
            (epoch, global_steps, optimizer.lr.numpy(), ciou_loss, conf_loss,
             prob_loss, total_loss))
        # update learning rate
        global_steps.assign_add(1)
        if global_steps < warmup_steps:
            lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
        else:
            lr = cfg.TRAIN.LR_END + 0.5 * (
                cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos(
                    (global_steps - warmup_steps) /
                    (total_steps - warmup_steps) * np.pi)))
        optimizer.lr.assign(lr.numpy())

        # writing summary data
        with writer.as_default():
            tf.summary.scalar("lr", optimizer.lr, step=global_steps)
            tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
            tf.summary.scalar("loss/ciou_loss", ciou_loss, step=global_steps)
            tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
            tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
        writer.flush()
Exemplo n.º 4
0
image_path   = "./data/kite.jpg"
check_dir    = "./saved_model/"

original_image      = cv2.imread(image_path)
original_image      = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_image_size = original_image.shape[:2]

image_data = tools.preprocess_data(np.copy(original_image), [input_size, input_size])
image_data = image_data[np.newaxis, ...].astype(np.float32)

model = YOLOv3()
model.load_weights(filepath=cfg.YOLO.SAVE_MODEL_DIR + "saved_model")

feature_maps = model.predict(image_data)
decoded_tensor = []
for i, conv_tensor in enumerate(feature_maps):
    pred_tensor = loss.decode(conv_tensor, i)
    decoded_tensor.append(pred_tensor)

pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in decoded_tensor]
pred_bbox = tf.concat(pred_bbox, axis=0)

bboxes = tools.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3)
bboxes = nms(bboxes, 0.45, method='nms')

image = tools.draw_bbox(original_image, bboxes, show_label=True)
image = Image.fromarray(image)
image.show()


Exemplo n.º 5
0
vid = cv2.VideoCapture(video_path)
while True:
    return_value, frame = vid.read()
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    else:
        raise ValueError("No image!")
    frame_size = frame.shape[:2]
    image_data = image_preporcess(np.copy(frame), [input_size, input_size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)

    prev_time = time.time()

    feature_maps = model.predict(image_data)
    decoded_tensor = decode(feature_maps)

    curr_time = time.time()
    exec_time = curr_time - prev_time

    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in decoded_tensor]
    pred_bbox = tf.concat(pred_bbox, axis=0)
    bboxes = postprocess_boxes(pred_bbox, frame_size, input_size, 0.7)
    bboxes = nms(bboxes, 0.213, method='nms')
    image = draw_bbox(frame, bboxes, show_label=True)
    result = np.asarray(image)
    info = "time: %.2f ms" % (1000 * exec_time)
    cv2.putText(result,
                text=info,
                org=(50, 70),
                fontFace=cv2.FONT_HERSHEY_SIMPLEX,