def finish_episode(): R = 0 policy_loss = [] value_loss = [] returns = [] for r in policy.rewards[::-1]: R = r + gamma * R returns.insert(0, R) returns = paddle.to_tensor(returns) returns = (returns - returns.mean()) / (returns.std() + eps) for (log_prob, value), R in zip(policy.saved_log_probs, returns): advantage = R - value policy_loss.append(-log_prob * advantage) value_loss.append(F.smooth_l1_loss(value.reshape([-1]), R.reshape([-1]))) optimizer.clear_grad() policy_loss = paddle.concat(policy_loss).sum() value_loss = paddle.concat(value_loss).sum() loss = policy_loss + value_loss loss.backward() optimizer.step() del policy.rewards[:] del policy.saved_log_probs[:]
def forward(self, features, im_info, boxes=None): # prediction pred_cls_score_list = [] pred_bbox_offsets_list = [] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors all_anchors_list = [] # stride: 64,32,16,8,4 p6->p2 base_stride = 4 off_stride = 2**(len(features) - 1) # 16 for fm in features: layer_anchors = self.anchors_generator(fm, base_stride, off_stride) off_stride = off_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions rpn_rois = find_top_rpn_proposals(self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info) rpn_rois = rpn_rois.cast('float32') if self.training: rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) #rpn_labels = rpn_labels.astype(np.int32) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 # objectness_loss = softmax_loss( # torch.gather(pred_cls_score,torch.nonzero(valid_masks)), # torch.gather(rpn_labels,torch.nonzero(valid_masks))) objectness_loss = F.binary_cross_entropy( F.softmax( torch.gather(pred_cls_score, torch.nonzero(valid_masks))), torch.gather( torch.eye(2), torch.gather(rpn_labels, torch.nonzero(valid_masks)))) pos_masks = rpn_labels > 0 # localization_loss = smooth_l1_loss( # pred_bbox_offsets[pos_masks], # rpn_bbox_targets[pos_masks], # config.rpn_smooth_l1_beta) localization_loss = \ F.smooth_l1_loss(torch.gather(pred_bbox_offsets, torch.nonzero(pos_masks)), torch.gather(rpn_bbox_targets, torch.nonzero(pos_masks)),delta=config.rcnn_smooth_l1_beta) normalizer = 1 / valid_masks.cast('float32').sum() loss_rpn_cls = objectness_loss.sum() * normalizer loss_rpn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, loss_dict else: return rpn_rois
def forward(self, student, teacher): bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) t_d = pdist(teacher, squared=False, eps=self.eps) mean_td = t_d.mean() t_d = t_d / (mean_td + self.eps) d = pdist(student, squared=False, eps=self.eps) mean_d = d.mean() d = d / (mean_d + self.eps) loss = F.smooth_l1_loss(d, t_d, reduction="mean") return loss
def forward(self, confidence, predicted_locations, labels, gt_locations): """Compute classification loss and smooth l1 loss. Args: confidence (batch_size, num_priors, num_classes): class predictions. locations (batch_size, num_priors, 4): predicted locations. labels (batch_size, num_priors): real labels of all the priors. boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. """ num_classes = confidence.shape[2] with paddle.no_grad(): # derived from cross_entropy=sum(log(p)) loss = -F.log_softmax(confidence, 2)[:, :, 0] mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) confidence = paddle.concat([ confidence[:, :, 0].masked_select(mask).reshape([-1, 1]), confidence[:, :, 1].masked_select(mask).reshape([-1, 1]) ], axis=1) classification_loss = F.cross_entropy(confidence.reshape( [-1, num_classes]), labels.masked_select(mask), reduction='sum') pos_mask = labels > 0 predicted_locations = predicted_locations.masked_select( paddle.concat([ pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]) ], axis=2)).reshape([-1, 4]) gt_locations = gt_locations.masked_select( paddle.concat([ pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]) ], axis=2)).reshape([-1, 4]) smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations.cast('float32'), reduction='sum') # smooth_l1_loss # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum') #l2 loss num_pos = gt_locations.shape[0] return smooth_l1_loss / num_pos, classification_loss / num_pos
def train(model, data_loader, optimizer, lr_scheduler, epoch, LOG): stages = 4 losses = [AverageMeter() for _ in range(stages)] length_loader = len(data_loader) model.train() for batch_id, data in enumerate(data_loader()): left_img, right_img, gt = data mask = paddle.to_tensor(gt.numpy() > 0) gt_mask = paddle.masked_select(gt, mask) outputs = model(left_img, right_img) outputs = [paddle.squeeze(output) for output in outputs] tem_stage_loss = [] for index in range(stages): temp_loss = args.loss_weights[index] * F.smooth_l1_loss( paddle.masked_select(outputs[index], mask), gt_mask, reduction='mean') tem_stage_loss.append(temp_loss) losses[index].update( float(temp_loss.numpy() / args.loss_weights[index])) sum_loss = paddle.add_n(tem_stage_loss) sum_loss.backward() optimizer.step() optimizer.clear_grad() if batch_id % 5 == 0: info_str = [ 'Stage {} = {:.2f}({:.2f})'.format(x, losses[x].val, losses[x].avg) for x in range(stages) ] info_str = '\t'.join(info_str) info_str = 'Train Epoch{} [{}/{}] lr:{:.5f}\t{}'.format( epoch, batch_id, length_loader, optimizer.get_lr(), info_str) LOG.info(info_str) lr_scheduler.step() info_str = '\t'.join( ['Stage {} = {:.2f}'.format(x, losses[x].avg) for x in range(stages)]) LOG.info('Average train loss: ' + info_str)
def forward(self, student, teacher): # reshape for feature map distillation bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) td = (teacher.unsqueeze(0) - teacher.unsqueeze(1)) norm_td = F.normalize(td, p=2, axis=2) t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2, 1])).reshape( [-1, 1]) sd = (student.unsqueeze(0) - student.unsqueeze(1)) norm_sd = F.normalize(sd, p=2, axis=2) s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2, 1])).reshape( [-1, 1]) loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean') return loss
def det_loss(self, p_det, anchor, t_conf, t_box): pshape = paddle.shape(p_det) pshape.stop_gradient = True nB, nGh, nGw = pshape[0], pshape[-2], pshape[-1] nA = len(anchor) p_det = paddle.reshape( p_det, [nB, nA, self.num_classes + 5, nGh, nGw]).transpose( (0, 1, 3, 4, 2)) # 1. loss_conf: cross_entropy p_conf = p_det[:, :, :, :, 4:6] p_conf_flatten = paddle.reshape(p_conf, [-1, 2]) t_conf_flatten = t_conf.flatten() t_conf_flatten = paddle.cast(t_conf_flatten, dtype="int64") t_conf_flatten.stop_gradient = True loss_conf = F.cross_entropy(p_conf_flatten, t_conf_flatten, ignore_index=-1, reduction='mean') loss_conf.stop_gradient = False # 2. loss_box: smooth_l1_loss p_box = p_det[:, :, :, :, :4] p_box_flatten = paddle.reshape(p_box, [-1, 4]) t_box_flatten = paddle.reshape(t_box, [-1, 4]) fg_inds = paddle.nonzero(t_conf_flatten > 0).flatten() if fg_inds.numel() > 0: reg_delta = paddle.gather(p_box_flatten, fg_inds) reg_target = paddle.gather(t_box_flatten, fg_inds) else: reg_delta = paddle.to_tensor([0, 0, 0, 0], dtype='float32') reg_delta.stop_gradient = False reg_target = paddle.to_tensor([0, 0, 0, 0], dtype='float32') reg_target.stop_gradient = True loss_box = F.smooth_l1_loss(reg_delta, reg_target, reduction='mean', delta=1.0) loss_box.stop_gradient = False return loss_conf, loss_box
def forward(self, student, teacher): # GAP to reduce memory if self.avgpool is not None: # NxC1xH1xW1 -> NxC1x1x1 student = self.avgpool(student) # NxC2xH2xW2 -> NxC2x1x1 teacher = self.avgpool(teacher) bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) t_d = pdist(teacher, squared=False) mean_td = t_d.mean() t_d = t_d / (mean_td + self.eps) d = pdist(student, squared=False) mean_d = d.mean() d = d / (mean_d + self.eps) loss = F.smooth_l1_loss(d, t_d, reduction="mean") return loss
def forward(self, input, label, conf): x_emb = self.embedding(input) fc = self.lin_a(x_emb) mask = conf > 0 mask = paddle.cast(mask, dtype="int64") mask.stop_gradient = True emb_mask = mask.max(1).flatten() emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten() emb_mask_inds.stop_gradient = True if emb_mask_inds.numel() == 0: loss_box = self.phony * 0 else: projection = self.lin_b(fc) projection = paddle.reshape(projection, shape=[-1, 1]) output = paddle.gather(projection, emb_mask_inds) target = paddle.gather(label, emb_mask_inds) loss_box = F.smooth_l1_loss(output, target, reduction='sum', delta=1.0) loss_box = loss_box / len(conf) return loss_box
def forward(self, student, teacher): # GAP to reduce memory if self.avgpool is not None: # NxC1xH1xW1 -> NxC1x1x1 student = self.avgpool(student) # NxC2xH2xW2 -> NxC2x1x1 teacher = self.avgpool(teacher) # reshape for feature map distillation bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) td = (teacher.unsqueeze(0) - teacher.unsqueeze(1)) norm_td = F.normalize(td, p=2, axis=2) t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2, 1])).reshape([-1, 1]) sd = (student.unsqueeze(0) - student.unsqueeze(1)) norm_sd = F.normalize(sd, p=2, axis=2) s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2, 1])).reshape([-1, 1]) loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean') return loss
def forward(self, boxes, scores, gt_bbox, gt_label, prior_boxes): boxes = paddle.concat(boxes, axis=1) scores = paddle.concat(scores, axis=1) gt_label = gt_label.unsqueeze(-1).astype('int64') prior_boxes = paddle.concat(prior_boxes, axis=0) bg_index = scores.shape[-1] - 1 # Match bbox and get targets. targets_bbox, targets_label = \ self._bipartite_match_for_batch(gt_bbox, gt_label, prior_boxes, bg_index) targets_bbox.stop_gradient = True targets_label.stop_gradient = True # Compute regression loss. # Select positive samples. bbox_mask = (targets_label != bg_index).astype(boxes.dtype) loc_loss = bbox_mask * F.smooth_l1_loss( boxes, targets_bbox, reduction='none') loc_loss = loc_loss.sum() * self.loc_loss_weight # Compute confidence loss. conf_loss = F.softmax_with_cross_entropy(scores, targets_label) # Mining hard examples. label_mask = self._mine_hard_example(conf_loss.squeeze(-1), targets_label.squeeze(-1), bg_index) conf_loss = conf_loss * label_mask.unsqueeze(-1).astype( conf_loss.dtype) conf_loss = conf_loss.sum() * self.conf_loss_weight # Compute overall weighted loss. normalizer = (targets_label != bg_index).astype('float32').sum().clip( min=1) loss = (conf_loss + loc_loss) / (normalizer + 1e-9) return loss
def train(args): # 使用 GPU训练 if paddle.is_compiled_with_cuda(): paddle.set_device("gpu:0") # 创建多进程的游戏环境 envs = MultipleEnvironments(args.game, args.num_processes) # 固定初始化状态 paddle.seed(123) # 创建模型 model = Model(envs.num_states, envs.num_actions) # 加载预训练模型 if args.trained_model is not None: model.load_dict(paddle.load(args.trained_model)) # 创建保存模型的文件夹 if not os.path.isdir(args.saved_path): os.makedirs(args.saved_path) paddle.save(model.state_dict(), "{}/model_{}.pdparams".format(args.saved_path, args.game)) # 为游戏评估单独开一个进程 mp = _mp.get_context("spawn") process = mp.Process(target=eval, args=(args, envs.num_states, envs.num_actions)) process.start() # 创建优化方法 clip_grad = paddle.nn.ClipGradByNorm(clip_norm=0.5) optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=args.lr, grad_clip=clip_grad) # 刚开始给每个进程的游戏执行初始化 [agent_conn.send(("reset", None)) for agent_conn in envs.agent_conns] # 获取游戏初始的界面 curr_states = [agent_conn.recv() for agent_conn in envs.agent_conns] curr_states = paddle.to_tensor(np.concatenate(curr_states, 0), dtype='float32') curr_episode = 0 while True: curr_episode += 1 old_log_policies, actions, values, states, rewards, dones = [], [], [], [], [], [] for _ in range(args.num_local_steps): states.append(curr_states) # 执行预测 logits, value = model(curr_states) # 计算每个动作的概率值 policy = F.softmax(logits) # 根据每个标签的概率随机生成符合概率的标签 old_m = Categorical(policy) action = old_m.sample([1]).squeeze() # 记录预测数据 actions.append(action) values.append(value.squeeze()) # 计算类别的概率的对数 old_log_policy = old_m.log_prob(paddle.unsqueeze(action, axis=1)) old_log_policy = paddle.squeeze(old_log_policy) old_log_policies.append(old_log_policy) # 向各个进程游戏发送动作 [ agent_conn.send(("step", int(act[0]))) for agent_conn, act in zip(envs.agent_conns, action) ] # 将多进程的游戏数据打包 state, reward, done, info = zip( *[agent_conn.recv() for agent_conn in envs.agent_conns]) # 进行数据转换 state = paddle.to_tensor(np.concatenate(state, 0), dtype='float32') # 转换为tensor数据 reward = paddle.to_tensor(reward, dtype='float32') done = paddle.to_tensor(done, dtype='float32') # 记录预测数据 rewards.append(reward) dones.append(done) curr_states = state # 根据上面最后的图像预测 _, next_value, = model(curr_states) next_value = next_value.squeeze() old_log_policies = paddle.concat(old_log_policies).detach().squeeze() actions = paddle.concat(actions).squeeze() values = paddle.concat(values).squeeze().detach() states = paddle.concat(states).squeeze() gae = 0.0 R = [] for value, reward, done in list(zip(values, rewards, dones))[::-1]: gae = gae * args.gamma * args.tau gae = gae + reward + args.gamma * next_value.detach() * ( 1.0 - done) - value.detach() next_value = value R.append(gae + value) R = R[::-1] R = paddle.concat(R).detach() advantages = R - values for i in range(args.num_epochs): indice = paddle.randperm(args.num_local_steps * args.num_processes) for j in range(args.batch_size): batch_indices = indice[int(j * ( args.num_local_steps * args.num_processes / args.batch_size )):int((j + 1) * (args.num_local_steps * args.num_processes / args.batch_size))] # 根据拿到的图像执行预测 logits, value = model(paddle.gather(states, batch_indices)) # 计算每个动作的概率值 new_policy = F.softmax(logits) # 计算类别的概率的对数 new_m = Categorical(new_policy) new_log_policy = new_m.log_prob( paddle.unsqueeze(paddle.gather(actions, batch_indices), axis=1)) new_log_policy = paddle.squeeze(new_log_policy) # 计算actor损失 ratio = paddle.exp( new_log_policy - paddle.gather(old_log_policies, batch_indices)) advantage = paddle.gather(advantages, batch_indices) actor_loss = paddle.clip(ratio, 1.0 - args.epsilon, 1.0 + args.epsilon) * advantage actor_loss = paddle.concat([ paddle.unsqueeze(ratio * advantage, axis=0), paddle.unsqueeze(actor_loss, axis=0) ]) actor_loss = -paddle.mean(paddle.min(actor_loss, axis=0)) # 计算critic损失 critic_loss = F.smooth_l1_loss(paddle.gather(R, batch_indices), value.squeeze()) entropy_loss = paddle.mean(new_m.entropy()) # 计算全部损失 total_loss = actor_loss + critic_loss - args.beta * entropy_loss # 计算梯度 total_loss.backward() optimizer.step() optimizer.clear_grad() paddle.save( model.state_dict(), "{}/model_{}.pdparams".format(args.saved_path, args.game)) print("Episode: {}. Total loss: {:.4f}".format(curr_episode, total_loss.numpy()[0]))
def forward(self, boxes, scores, gt_box, gt_class, anchors): boxes = paddle.concat(boxes, axis=1) scores = paddle.concat(scores, axis=1) prior_boxes = paddle.concat(anchors, axis=0) gt_label = gt_class.unsqueeze(-1) batch_size, num_priors, num_classes = scores.shape def _reshape_to_2d(x): return paddle.flatten(x, start_axis=2) # 1. Find matched bounding box by prior box. # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. # 1.2 Compute matched bounding box by bipartite matching algorithm. matched_indices = [] matched_dist = [] for i in range(gt_box.shape[0]): iou = iou_similarity(gt_box[i], prior_boxes) matched_indice, matched_d = bipartite_match( iou, self.match_type, self.overlap_threshold) matched_indices.append(matched_indice) matched_dist.append(matched_d) matched_indices = paddle.concat(matched_indices, axis=0) matched_indices.stop_gradient = True matched_dist = paddle.concat(matched_dist, axis=0) matched_dist.stop_gradient = True # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices target_label, _ = self._label_target_assign(gt_label, matched_indices) confidence = _reshape_to_2d(scores) # 2.2. Compute confidence loss. # Reshape confidence to 2D tensor. target_label = _reshape_to_2d(target_label).astype('int64') conf_loss = F.softmax_with_cross_entropy(confidence, target_label) conf_loss = paddle.reshape(conf_loss, [batch_size, num_priors]) # 3. Mining hard examples neg_mask = self._mine_hard_example(conf_loss, matched_indices, matched_dist, neg_pos_ratio=self.neg_pos_ratio, neg_overlap=self.neg_overlap) # 4. Assign classification and regression targets # 4.1. Encoded bbox according to the prior boxes. prior_box_var = paddle.to_tensor( np.array([0.1, 0.1, 0.2, 0.2], dtype='float32')).reshape([1, 4]).expand_as(prior_boxes) encoded_bbox = [] for i in range(gt_box.shape[0]): encoded_bbox.append( box_coder(prior_box=prior_boxes, prior_box_var=prior_box_var, target_box=gt_box[i], code_type='encode_center_size')) encoded_bbox = paddle.stack(encoded_bbox, axis=0) # 4.2. Assign regression targets target_bbox, target_loc_weight = self._bbox_target_assign( encoded_bbox, matched_indices) # 4.3. Assign classification targets target_label, target_conf_weight = self._label_target_assign( gt_label, matched_indices, neg_mask=neg_mask) # 5. Compute loss. # 5.1 Compute confidence loss. target_label = _reshape_to_2d(target_label).astype('int64') conf_loss = F.softmax_with_cross_entropy(confidence, target_label) target_conf_weight = _reshape_to_2d(target_conf_weight) conf_loss = conf_loss * target_conf_weight * self.conf_loss_weight # 5.2 Compute regression loss. location = _reshape_to_2d(boxes) target_bbox = _reshape_to_2d(target_bbox) loc_loss = F.smooth_l1_loss(location, target_bbox, reduction='none') loc_loss = paddle.sum(loc_loss, axis=-1, keepdim=True) target_loc_weight = _reshape_to_2d(target_loc_weight) loc_loss = loc_loss * target_loc_weight * self.loc_loss_weight # 5.3 Compute overall weighted loss. loss = conf_loss + loc_loss loss = paddle.reshape(loss, [batch_size, num_priors]) loss = paddle.sum(loss, axis=1, keepdim=True) normalizer = paddle.sum(target_loc_weight) loss = paddle.sum(loss / normalizer) return loss
def compute_res_loss(output, target): # return F.smooth_l1_loss(output, target, reduction='elementwise_mean') return F.smooth_l1_loss(output, target, reduction='mean')