def save_conv(fp, conv_model): if conv_model.bias.is_cuda: convert2cpu(conv_model.bias.data).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) else: conv_model.bias.data.numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp)
def save_conv_bn(fp, conv_model, bn_model): if bn_model.bias.is_cuda: convert2cpu(bn_model.bias.data).numpy().tofile(fp) convert2cpu(bn_model.weight.data).numpy().tofile(fp) convert2cpu(bn_model.running_mean).numpy().tofile(fp) convert2cpu(bn_model.running_var).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) else: bn_model.bias.data.numpy().tofile(fp) bn_model.weight.data.numpy().tofile(fp) bn_model.running_mean.numpy().tofile(fp) bn_model.running_var.numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp)
def forward(self, output, target): #output : BxAs*(4+1+num_classes)*H*W mask_tuple = self.get_mask_boxes(output) t0 = time.time() nB = output.data.size(0) # batch size nA = mask_tuple['n'].item() # num_anchors nC = self.num_classes nH = output.data.size(2) nW = output.data.size(3) anchor_step = mask_tuple['a'].size(0) // nA anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device) cls_anchor_dim = nB * nA * nH * nW output = output.view(nB, nA, (5 + nC), nH, nW) cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device) ix = torch.LongTensor(range(0, 5)).to(self.device) pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) coord = output.index_select(2, ix[0:4]).view( nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view( -1, cls_anchor_dim) # x, y, w, h coord[0:2] = coord[0:2].sigmoid() # x, y conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() cls = output.index_select(2, cls_grid) cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view( cls_anchor_dim, nC) t1 = time.time() grid_x = torch.linspace(0, nW - 1, nW).repeat( nB * nA, nH, 1).view(cls_anchor_dim).to(self.device) grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat( nB * nA, 1, 1).view(cls_anchor_dim).to(self.device) anchor_w = anchors.index_select(1, ix[0]).repeat( 1, nB * nH * nW).view(cls_anchor_dim) anchor_h = anchors.index_select(1, ix[1]).repeat( 1, nB * nH * nW).view(cls_anchor_dim) pred_boxes[0] = coord[0] + grid_x pred_boxes[1] = coord[1] + grid_y pred_boxes[2] = coord[2].exp() * anchor_w pred_boxes[3] = coord[3].exp() * anchor_h # for build_targets. it works faster on CPU than on GPU pred_boxes = convert2cpu( pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach() t2 = time.time() nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \ self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW) cls_mask = (cls_mask == 1) tcls = tcls[cls_mask].long().view(-1) cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device) cls = cls[cls_mask].view(-1, nC) nProposals = int((conf > 0.25).sum()) tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) tconf, tcls = tconf.to(self.device), tcls.to(self.device) coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to( self.device), conf_mask.to(self.device) t3 = time.time() loss_coord = nn.MSELoss(size_average=False)(coord * coord_mask, tcoord * coord_mask) / 2 loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) loss_cls = nn.CrossEntropyLoss( size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 loss = loss_coord + loss_conf + loss_cls t4 = time.time() if False: print('-' * 30) print(' activation : %f' % (t1 - t0)) print(' create pred_boxes : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) print( '%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss)) if math.isnan(loss.item()): print(conf, tconf) sys.exit(0) return loss