def make_predict(model_path, data_path, res_file): net = build_ssd('test', 300, pedestrian['num_classes']) net.load_weights(model_path) net = net.cuda() images = os.listdir(data_path) with open(res_file, "w") as f: for image in images: img = cv2.imread(os.path.join(data_path, image), cv2.IMREAD_COLOR) rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) x = cv2.resize(img, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = net(xx) from data import DATA_CLASSES as labels top_k = 10 detections = y.data scale = torch.Tensor(rgb_img.shape[1::-1]).repeat(2) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.01: score = detections[0, i, j, 0] label_name = labels[i - 1] pt = (detections[0, i, j, 1:] * scale).cpu().numpy() coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 j += 1 f.write("{} {} {} {} {} {}\n".format( image, score, pt[0], pt[1], pt[2], pt[3]))
def build_model(self): """Build backbone and SSD models.""" # store in a dictionary the list of image files and labels self.build_dictionary() # input shape is (480, 640, 3) by default self.input_shape = (self.args.height, self.args.width, self.args.channels) # build the backbone network (eg ResNet50) # the number of feature layers is equal to n_layers # feature layers are inputs to SSD network heads # for class and offsets predictions self.backbone = self.args.backbone(self.input_shape, n_layers=self.args.layers) # using the backbone, build ssd network # outputs of ssd are class and offsets predictions anchors, features, ssd = build_ssd(self.input_shape, self.backbone, n_layers=self.args.layers, n_classes=self.n_classes) # n_anchors = num of anchors per feature point (eg 4) self.n_anchors = anchors # feature_shapes is a list of feature map shapes # per output layer - used for computing anchor boxes sizes self.feature_shapes = features # ssd network model self.ssd = ssd
def test_voc(): # load net net = build_ssd('test', 300, voc) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data testset = VOCDetection(args.voc_root, [('2007', 'test')], BaseTransform(300, voc['mean'],voc['std'])) if args.cuda: net = net.cuda() torch.backends.cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, testset, BaseTransform(300, voc['mean'],voc['std']), thresh=args.visual_threshold)
def test_voc(): # load net torch.cuda.set_device(1) net = build_ssd('test', 300, laji) # initialize SSD net.load_state_dict(torch.load(args.trained_model, map_location="cuda:1")) net.eval() print('Finished loading model!') # load data testset = LAJIDetection(args.laji_root, [('2007', 'test')], BaseTransform(300, laji['mean'], laji['std'])) if args.cuda: net = net.cuda() #torch.backends.cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, testset, BaseTransform(300, laji['mean'], laji['std']), thresh=args.visual_threshold)
def build_model(self, build_basenet): self.build_dictionary() # load 1st image and build base network image_path = os.path.join(config.params['data_path'], self.keys[0]) image = skimage.img_as_float(imread(image_path)) self.input_shape = image.shape self.basenetwork = build_basenet(self.input_shape, n_layers=self.n_layers) self.basenetwork.summary() #plot_model(self.basenetwork, # to_file="basenetwork.png", # show_shapes=True) ret = build_ssd(self.input_shape, self.basenetwork, n_layers=self.n_layers, n_classes=self.n_classes) # n_anchors = num of anchors per feature point (eg 4) # feature_shapes is the feature map shape # feature map - basis of class and offset predictions self.n_anchors, self.feature_shapes, self.ssd = ret self.ssd.summary()
def train(): ''' get the dataset and dataloader ''' print(args.dataset) if args.dataset == 'COCO': if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = coco dataset = COCODetection(root=COCO_ROOT, transform=SSDAugmentation( cfg['min_dim'], MEANS), filename='train.txt') elif args.dataset == 'VOC': if not os.path.exists(VOC_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = voc dataset = VOCDetection(root=VOC_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) print(len(dataset)) elif args.dataset == 'LAJI': if not os.path.exists(LAJI_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = laji_se_resnext101_32x4d dataset = LAJIDetection(root=LAJI_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) print(len(dataset)) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # build, load, the net ssd_net = build_ssd('train', size=cfg['min_dim'], cfg=cfg) ''' for name,param in ssd_net.named_parameters(): if param.requires_grad: print(name) ''' start_iter = args.start_iter if args.resume: print('Resuming training, loading {}...'.format(args.resume)) data_tmp = torch.load(args.resume) data_tmp = {k.lstrip("module."): v for k, v in data_tmp.items()} ssd_net.load_state_dict(data_tmp) start_iter = int(args.resume.split("/")[-1].split("_")[-2]) print("start_iter is {}".format(start_iter)) if args.cuda: net = ssd_net.cuda() # net = torch.nn.DataParallel(net) net.train() #optimizer optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.AdamW(net.parameters(), args.lr) #loss:SmoothL1\Iou\Giou\Diou\Ciou print(cfg['losstype']) criterion = MultiBoxLoss(cfg=cfg, overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, use_gpu=args.cuda, loss_name=cfg['losstype']) project_name = "_".join([args.net_name, args.config]) pth_path = os.path.join(args.save_path, project_name) log_path = os.path.join(pth_path, 'tensorboard') os.makedirs(pth_path, exist_ok=True) os.makedirs(log_path, exist_ok=True) writer = SummaryWriter( log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name, epoch_size) iteration = args.start_iter step_index = 0 loc_loss = 0 conf_loss = 0 step = 0 num_iter_per_epoch = len(data_loader) lr_need_steps = list(cfg['lr_steps']) for epoch in range(start_iter, args.max_epoch): progress_bar = tqdm(data_loader) for ii, batch_iterator in enumerate(progress_bar): iteration += 1 if step in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, num_iter_per_epoch) # load train data images, targets = batch_iterator # print(images,targets) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = images targets = [ann for ann in targets] t0 = time.time() out = net(images, 'train') optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = weight * loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, args.max_epoch, ii + 1, num_iter_per_epoch, loss_c.item(), loss_l.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': loss_l.item()}, step) writer.add_scalars('Classfication_loss', {'train': loss_c.item()}, step) current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) # print(iteration) # if iteration % 10 == 0: # print('timer: %.4f sec.' % (t1 - t0)) # print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') step += 1 # if epoch % 10 == 0 and epoch >60: # # epoch>1000 and epoch % 50 == 0: # print('Saving state, iter:', iteration) # #print('loss_l:'+weight * loss_l+', loss_c:'+'loss_c') # save_folder = args.work_dir+cfg['work_name'] # if not os.path.exists(save_folder): # os.mkdir(save_folder) # torch.save(net.state_dict(),args.work_dir+cfg['work_name']+'/ssd'+ # repr(epoch)+'_.pth') if step != 0 and step % 4000 == 0: torch.save( net.state_dict(), os.path.join(pth_path, f'{args.net_name}_{epoch}_{step}.pth')) loc_loss = 0 conf_loss = 0 torch.save(net.state_dict(), os.path.join(pth_path, f'{args.net_name}_{epoch}_{step}.pth'))
def train(): ''' get the dataset and dataloader ''' print(args.dataset) if args.dataset == 'COCO': if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = coco dataset = COCODetection(root=COCO_ROOT, transform=SSDAugmentation( cfg['min_dim'], MEANS), filename='train.txt') elif args.dataset == 'VOC': if not os.path.exists(VOC_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = voc dataset = VOCDetection(root=VOC_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) print(len(dataset)) elif args.dataset == 'CRACK': if not os.path.exists(CRACK_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = crack dataset = CRACKDetection(root=CRACK_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) #build, load, the net ssd_net = build_ssd('train', size=cfg['min_dim'], cfg=cfg) ''' for name,param in ssd_net.named_parameters(): if param.requires_grad: print(name) ''' if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_state_dict(torch.load(args.resume)) if args.cuda: net = ssd_net.cuda() net.train() #optimizer optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #loss:SmoothL1\Iou\Giou\Diou\Ciou print(cfg['losstype']) criterion = MultiBoxLoss(cfg=cfg, overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, use_gpu=args.cuda, loss_name=cfg['losstype']) if args.visdom: import visdom viz = visdom.Visdom(env=cfg['work_name']) vis_title = 'SSD on ' + args.dataset vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot(viz, 'Epoch', 'Loss', vis_title + " epoch loss", vis_legend) #epoch_acc = create_acc_plot(viz,'Epoch', 'acc', args.dataset+" Acc",["Acc"]) epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name, epoch_size) iteration = args.start_iter step_index = 0 loc_loss = 0 conf_loss = 0 for epoch in range(args.max_epoch): for ii, batch_iterator in tqdm(enumerate(data_loader)): iteration += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data images, targets = batch_iterator #print(images,targets) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = images targets = [ann for ann in targets] t0 = time.time() out = net(images, 'train') optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = weight * loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() #print(iteration) if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: if iteration > 20 and iteration % 10 == 0: update_vis_plot(viz, iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if epoch % 10 == 0 and epoch > 60: #epoch>1000 and epoch % 50 == 0: print('Saving state, iter:', iteration) #print('loss_l:'+weight * loss_l+', loss_c:'+'loss_c') save_folder = args.work_dir + cfg['work_name'] if not os.path.exists(save_folder): os.mkdir(save_folder) torch.save( net.state_dict(), args.work_dir + cfg['work_name'] + '/ssd' + repr(epoch) + '_.pth') if args.visdom: update_vis_plot(viz, epoch, loc_loss, conf_loss, epoch_plot, epoch_plot, 'append', epoch_size) loc_loss = 0 conf_loss = 0 torch.save( net.state_dict(), args.work_dir + cfg['work_name'] + '/ssd' + repr(epoch) + str(args.weight) + '_.pth')
def train(): cfg = config.Damage train_dataset = Damage_Dataset(name='train', label_root=args.train_label, transform=SSDAugmentation(mean=config.MEANS)) val_dataset = Damage_Dataset(name='validation', label_root=args.val_label, transform=BaseTransform(mean=config.MEANS)) ssd_net = build_ssd('train', cfg['min_dim'], config.num_classes) net = ssd_net #cycle_cos_lr = cycle_lr(500, cfg['peak_lr'], cfg['T_init'], cfg['T_warmup']) if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load('../../pretrained/vgg16_reducedfc.pth') print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) #optimizer = optim.SGD(net.parameters(), lr=config.lr, momentum=config.momentum, # weight_decay=config.weight_decay) optimizer = optim.Adam(net.parameters(), lr=config.lr, weight_decay=config.weight_decay) criterion = MultiBoxLoss(config.num_classes, overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, use_gpu=args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the train dataset...') epoch_size = len(train_dataset) // config.batch_size print('Training SSD on:', train_dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = os.getcwd().split('/')[-1] vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) #epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) iter_val_plot = create_vis_plot('Iteration', 'Val Loss', vis_title, vis_legend) data_loader = data.DataLoader(train_dataset, config.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(val_dataset, config.batch_size, num_workers=args.num_workers,shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) val_batch_iterator = iter(val_data_loader) num = [0, 0, 0] for iteration in range(args.start_iter, config.max_iter): #if args.visdom and iteration != 0 and (iteration % epoch_size == 0): # update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, # 'append', epoch_size) # # reset epoch loss counters # loc_loss = 0 # conf_loss = 0 # epoch += 1 if iteration in config.lr_steps: step_index += 1 adjust_learning_rate(optimizer, config.gamma, step_index) # cycle lr #for param_group in optimizer.param_groups: # param_group['lr'] = cycle_cos_lr.get_lr(iteration) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) # calculate the frequency of every class for i in targets: labels = np.array(i)[:,4] for j in labels: num[int(j)] += 1 if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') print(num) if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([loss_l, loss_c, loss]).unsqueeze(0).cpu(), win=iter_plot, update='True' if iteration == 10 else 'append' ) if iteration % 100 == 0 and iteration != 0: val_loss_l, val_loss_c, val_loss = val(net, val_data_loader, criterion) print('Val_Loss: %.4f ||' % (val_loss.item()), end=' ') if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([val_loss_l, val_loss_c, val_loss]).unsqueeze(0).cpu(), win=iter_val_plot, update='True' if iteration == 100 else 'append' ) #if args.visdom: #update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], # update_vis_plot(iteration, loss_l.item(), loss_c.item(), # iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 1000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), args.save_folder + 'model/' + repr(iteration) + '.pth') loss_file = {'loss': val_loss.item(), 'loc_loss': val_loss_l.item(), 'conf_loss': val_loss_c.item()} with open(os.path.join(args.save_folder, 'eval', repr(iteration)+'.json'), 'w') as f: json.dump(loss_file, f) torch.save(ssd_net.state_dict(), args.save_folder + 'model/' + 'leaves' + '.pth')
return do_python_eval(data_dir, eval_type) if __name__ == '__main__': if torch.cuda.is_available(): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not args.cuda: print( "WARNING: It looks like you have a CUDA device, but aren't using \ CUDA. Run with --cuda for optimal eval speed.") torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', size=300, cfg=voc) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, [('2007', 'test')], BaseTransform(300, voc['mean'], voc['std'])) if args.cuda: net = net.cuda() #torch.backends.cudnn.benchmark = True net.eval() # evaluation devkit_path = VOC_ROOT + 'VOC2007/' all_boxes = test_net(args.save_folder,
def test_img(net_filepath, img_folder, tiles, batch_size, skip=300): # load net num_classes = config.num_classes net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict( torch.load(net_filepath, map_location=torch.device('cpu'))) net.eval() print('Finished loading model!') # load data transform = BaseTransform() with open(img_folder) as f: labels = json.load(f) img_names = list(labels.keys()) data = {} for k in tqdm(range(len(img_names))): dets = [] for tile in tiles: print(len(dets)) overlap = int(0.15 * tile) img_file = img_names[k] img = cv2.imread(img_file) img = img[:, :, ::-1] # skip the image boundary h, w, c = img.shape img = img[skip:h - skip, :, :] h, w, c = img.shape imgs = [] stride = tile - overlap h_num = (h - tile) // stride + 1 w_num = (w - tile) // stride + 1 for i in range(h_num): for j in range(w_num): # split the image into tiles x = img[i * stride:(i * stride + tile), j * stride:(j * stride + tile), :] imgs.append(x) # stack tiles input = np.stack(imgs, axis=0) bbox = [] for i in range((input.shape[0] - 1) // batch_size + 1): bbox += predict(input[batch_size * i:batch_size * (i + 1)], transform, net, i * batch_size) # TODO: for i in range(len(bbox)): xs, ys, xe, ye = bbox[i]['bbox'][:] tile_ind = bbox[i]['tile'] class_index = bbox[i]['index'] xdiff = xe - xs ydiff = ye - ys row_num = tile_ind // w_num col_num = tile_ind % w_num # compute offset ys += row_num * stride + skip xs += col_num * stride xe = xs + xdiff ye = ys + ydiff score = bbox[i]['score'] dets.append([xs, ys, xe, ye, score, class_index]) #print(len(dets)) keep = nms(dets, 0.5) print(len(keep)) data[img_file] = keep return data
torch.cuda.manual_seed(42) else: torch.manual_seed(42) torch.cuda.set_device(2) if torch.cuda.is_available(): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not args.cuda: print("WARNING: It looks like you have a CUDA device, but aren't using \ CUDA. Run with --cuda for optimal eval speed.") torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', size=300, cfg=laji_se_resnext101_32x4d) # initialize SSD data_tmp = torch.load(args.trained_model, map_location="cuda:2") data_tmp = {k.lstrip("module."): v for k, v in data_tmp.items()} net.load_state_dict(data_tmp) # net.load_state_dict(torch.load(args.trained_model, map_location="cuda:2")) print('Finished loading model!') # load data dataset = LAJIDetection(args.laji_root, [('2007', 'test')], BaseTransform(300, laji_se_resnext101_32x4d['mean'], laji_se_resnext101_32x4d['std'])) if args.cuda: net = net.cuda() torch.backends.cudnn.benchmark = True net.eval() # evaluation
def main(): mean = (104, 117, 123) trained_model = model_dir print('loading model!') net = build_ssd('test', ssd_dim, num_classes, tssd=tssd, top_k=top_k, thresh=confidence_threshold, nms_thresh=nms_threshold, attention=attention, prior=prior, tub=tub, tub_thresh=tub_thresh, tub_generate_score=tub_generate_score) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', model_dir) net = net.cuda() cudnn.benchmark = True frame_num = 9900 cap = cv2.VideoCapture(video_name) w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(w, h) size = (640, 480) if save_dir: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') record = cv2.VideoWriter( os.path.join(save_dir, video_name.split('/')[-1].split('.')[0] + '_OTA.avi'), fourcc, cap.get(cv2.CAP_PROP_FPS), size) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) att_criterion = AttentionLoss((h, w)) state = [None] * 6 if tssd in ['lstm', 'tblstm'] else None while (cap.isOpened()): ret, frame = cap.read() if not ret: break frame_draw = frame.copy() frame_num += 1 im_trans = base_transform(frame, ssd_dim, mean) with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).cuda() if tssd == 'ssd': detections, att_map = net(x) else: detections, state, att_map = net(x, state) out = list() for j in range(1, detections.size(1)): if detections[0, j, :, :].sum() == 0: continue for k in range(detections.size(2)): dets = detections[0, j, k, :] if dets.sum() == 0: continue boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:] identity = dets[-1] if dets.size(0) == 6 else -1 x_min = int(boxes[0] * w) x_max = int(boxes[2] * w) y_min = int(boxes[1] * h) y_max = int(boxes[3] * h) score = dets[0] if score > confidence_threshold: out.append([ x_min, y_min, x_max, y_max, j - 1, score.cpu().numpy(), identity ]) if attention: _, up_attmap = att_criterion( att_map) # scale, batch, tensor(1,h,w) att_target = up_attmap[0][0].cpu().data.numpy().transpose(1, 2, 0) for object in out: x_min, y_min, x_max, y_max, cls, score, identity = object if dataset_name in ['MOT15']: put_str = str(int(identity)) if identity in [34]: color = (0, 0, 255) elif identity in [35]: color = (0, 200, 0) elif identity in [58]: color = (255, 0, 255) # elif identity in [3]: # color = (255, 0, 255) # elif identity in [4]: # color = (0, 128, 255) # elif identity in [5]: # color = (255, 128, 128) else: color = (255, 0, 0) elif dataset_name in ['VID2017']: put_str = str( int(identity)) + ':' + VID_CLASSES_name[cls] + ':' + str( np.around(score, decimals=2)) elif dataset_name in ['UW']: put_str = str(int(identity)) if cls == 0: color = (min(int(identity) + 1, 255), 0, 255) elif cls == 1: color = (255, min(int(identity) + 1, 255), 0) elif cls == 2: color = (min(int(identity) + 1, 255), 128, 0) cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max), color, thickness=2) cv2.fillConvexPoly( frame_draw, np.array([[x_min - 1, y_min], [x_min - 1, y_min - 50], [x_max + 1, y_min - 50], [x_max + 1, y_min]], np.int32), color) cv2.putText(frame_draw, put_str, (x_min + 10, y_min - 10), cv2.FONT_HERSHEY_DUPLEX, 0.8, color=(255, 255, 255), thickness=1) print( str(frame_num) + ':' + str(np.around(score, decimals=2)) + ',' + VID_CLASSES_name[cls]) if not out: print(str(frame_num)) frame_show = cv2.resize(frame_draw, size) cv2.imshow('frame', frame_show) # cv2.imshow('att', cv2.resize(att_target, size)) if save_dir: record.write(frame_show) ch = cv2.waitKey(1) if ch == 32: # if frame_num in [44]: while 1: in_ch = cv2.waitKey(10) if in_ch == 115: # 's' if save_dir: print('save: ', frame_num) torch.save( out, os.path.join(save_dir, tssd + '_%s.pkl' % str(frame_num))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(frame_num)), frame) elif in_ch == 32: break cap.release() if save_dir: record.release() cv2.destroyAllWindows()
import torch from model import build_ssd, build_ssd_resnet from collections import OrderedDict ssdbn_net = build_ssd('train', 300, 31, bn=False) ssd_net = build_ssd('train', 300, 31, bn=False) # net = torch.nn.DataParallel(ssd_net) # torch.save(net.state_dict(),'test.pth') vgg_weights = torch.load('../../weights/vgg16_reducedfc_512.pth') vggbn_weights = torch.load('../../weights/vgg16_bn-6c64b313.pth') # ssd_weights = torch.load('../weights/ssd300_VID2017/ssd300_VID2017_290000.pth') vgg16bn_reducedfc_512_weights = OrderedDict() # ssd_extras_weights = OrderedDict() for key, weight in vggbn_weights.items(): key_class = key.split('.')[0] if key_class == 'features': key = key.split('.')[1] + '.' + key.split('.')[2] vgg16bn_reducedfc_512_weights[key] = weight vgg16bn_reducedfc_512_weights['44.weight'] = vgg_weights['31.weight'] vgg16bn_reducedfc_512_weights['44.bias'] = vgg_weights['31.bias'] vgg16bn_reducedfc_512_weights['45.weight'] = vgg16bn_reducedfc_512_weights[ '41.weight'].repeat(2) vgg16bn_reducedfc_512_weights['45.bias'] = vgg16bn_reducedfc_512_weights[ '41.bias'].repeat(2) vgg16bn_reducedfc_512_weights[ '45.running_mean'] = vgg16bn_reducedfc_512_weights[ '41.running_mean'].repeat(2) vgg16bn_reducedfc_512_weights[
def train(): dataset = PedestrainDataset(root=DATA_ROOT, transform=SSDAugmentation( pedestrian['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', pedestrian['min_dim'], pedestrian['num_classes']) net = ssd_net if train_params['cuda']: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load("./weights/vgg16_reducedfc.pth") print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if train_params['cuda']: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=train_params['learning_rate'], momentum=train_params['momentum'], weight_decay=train_params['weight_decay']) criterion = MultiBoxLoss(pedestrian['num_classes'], 0.5, True, 0, True, 3, 0.5, False, train_params['cuda']) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // train_params['batch_size'] print(epoch_size) print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator for iteration in range(args.start_iter, int(pedestrian['max_iter'])): if iteration % epoch_size == 0: batch_iterator = iter(data_loader) if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in pedestrian['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, train_params['gamma'], step_index) # load train data images, targets = next(batch_iterator) if train_params['cuda']: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) print("forward ok") # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) print("loss ok") loss = loss_l + loss_c loss.backward() print("backward ok") optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() writer.add_scalar('Loss', loss, iteration) writer.add_scalar('Loss_c', loss_c, iteration) writer.add_scalar('Loss_l', loss_l, iteration) if iteration % 1 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 2000 == 0: print('Saving state, iter:', iteration) torch.save( net.state_dict(), train_params['save_folder'] + 'iter_' + repr(iteration) + '.pth') torch.save(net.state_dict(), train_params['save_folder'] + 'PedestrainDetection.pth')
if __name__ == '__main__': os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = "1" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") img_dir = 'input/' gt_dir = 'input/' # load data dataset = PillDataset(img_dir, gt_dir) pill_dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, collate_fn=make_batch) # load model ssd_net = build_ssd(opt, size=300, num_classes=opt['num_classes'] + 1) ssd_net = load_model(checkpoint_dir='./weights/ssd_epoch_0400_loss_1.9637260580.pth', net=ssd_net) ssd_net = ssd_net.to(device) ssd_net.eval() print('Finished loading SSD model!') east_net = EAST(False).to(device) # east_net = load_model(checkpoint_dir='./weights/east_epoch_200.pth', net=east_net) east_net.load_state_dict(torch.load('./weights/east_epoch_200.pth')) east_net.eval() print('Finished loading EAST model!') # crnn_net = CRNN(crnn_opt).to(device) # crnn_net.load_state_dict(torch.load('./weights/crnn_best_accuracy.pth', map_location=device)) # print('Finished loading CRNN model!') for i, (img, labels) in enumerate(pill_dataloader): #print("여기이이이이")