def train_step(images, target): with tf.GradientTape() as tape: pred_results = model(images, training=True) decoded_tensor = [] for i, conv_tensor in enumerate(pred_results): pred_tensor = loss.decode(conv_tensor, i) decoded_tensor.append(conv_tensor) decoded_tensor.append(pred_tensor) # Computing LOSS giou_loss = conf_loss = prob_loss = 0 for i in range(3): conv, pred = decoded_tensor[i * 2], decoded_tensor[i * 2 + 1] # *用于参数前面,表示传入的多个参数将按照元组的形式存储,是一个元组; loss_items = loss.yolov3_loss(pred, conv, *target[i], i) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) tf.print("=> STEP %4d lr: %.6f giou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, optimizer.lr.numpy(), giou_loss, conf_loss, prob_loss, total_loss)) # update learning rate global_steps.assign_add(1) if global_steps < warmup_steps: lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT else: lr = cfg.TRAIN.LR_END + 0.5 * ( cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos( (global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))) optimizer.lr.assign(lr.numpy()) # writing summary data with writer.as_default(): tf.summary.scalar("lr", optimizer.lr, step=global_steps) tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps) tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps) tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps) writer.flush()
def get_boxes_and_scores(feats, #ypture anchors, image_shape #原图片的大小 ): """ 将预测出的box坐标转换为对应原图的坐标,然后计算每个box的分数 :param feats: yolo输出的feature map :param anchors: 其中一种大小的先验框(总共三种) :param image_shape: 原图片的shape :return: boxes(具体坐标) box_scores(box分数) """ box_xy,box_wh,box_confidence,box_class_probs = decode(feats, anchors, calc_loss=False) boxes = correct_boxes(box_xy, box_wh, image_shape) boxes = tf.reshape(boxes,[-1,4]) box_scores = box_confidence * box_class_probs box_scores = tf.reshape(box_scores,[-1,cfg.num_classes]) return boxes,box_scores
def train_step(img, target, epoch): with tf.GradientTape() as tape: pred_result = model(img, training=True) decoded_result = decode(pred_result) ciou_loss = conf_loss = prob_loss = 0 # optimizing process for i in range(3): loss_items = yolov4_loss(decoded_result[i], target[i][0], target[i][1], i=i) ciou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = ciou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) tf.print( "=>EPOCH %3d STEP %4d lr: %.6f ciou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (epoch, global_steps, optimizer.lr.numpy(), ciou_loss, conf_loss, prob_loss, total_loss)) # update learning rate global_steps.assign_add(1) if global_steps < warmup_steps: lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT else: lr = cfg.TRAIN.LR_END + 0.5 * ( cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos( (global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))) optimizer.lr.assign(lr.numpy()) # writing summary data with writer.as_default(): tf.summary.scalar("lr", optimizer.lr, step=global_steps) tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) tf.summary.scalar("loss/ciou_loss", ciou_loss, step=global_steps) tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps) tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps) writer.flush()
image_path = "./data/kite.jpg" check_dir = "./saved_model/" original_image = cv2.imread(image_path) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) original_image_size = original_image.shape[:2] image_data = tools.preprocess_data(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) model = YOLOv3() model.load_weights(filepath=cfg.YOLO.SAVE_MODEL_DIR + "saved_model") feature_maps = model.predict(image_data) decoded_tensor = [] for i, conv_tensor in enumerate(feature_maps): pred_tensor = loss.decode(conv_tensor, i) decoded_tensor.append(pred_tensor) pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in decoded_tensor] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = tools.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = nms(bboxes, 0.45, method='nms') image = tools.draw_bbox(original_image, bboxes, show_label=True) image = Image.fromarray(image) image.show()
vid = cv2.VideoCapture(video_path) while True: return_value, frame = vid.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: raise ValueError("No image!") frame_size = frame.shape[:2] image_data = image_preporcess(np.copy(frame), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) prev_time = time.time() feature_maps = model.predict(image_data) decoded_tensor = decode(feature_maps) curr_time = time.time() exec_time = curr_time - prev_time pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in decoded_tensor] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, frame_size, input_size, 0.7) bboxes = nms(bboxes, 0.213, method='nms') image = draw_bbox(frame, bboxes, show_label=True) result = np.asarray(image) info = "time: %.2f ms" % (1000 * exec_time) cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX,