def eval(net): net.eval() def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root=args.data_root, \ list_file=args.voc07_test, transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8) box_coder = SSDBoxCoder(net) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = [] for line in f.readlines(): line = line.strip().split() d = np.array([int(x) for x in line[1:]]) gt_difficults.append(d) for i, (inputs, box_targets, label_targets) in enumerate(dataloader): print('%d/%d' % (i, len(dataloader))) gt_boxes.append(box_targets.squeeze(0)) gt_labels.append(label_targets.squeeze(0)) loc_preds, cls_preds = net(Variable(inputs.cuda(), volatile=True)) box_preds, label_preds, score_preds = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.01) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) aps = (voc_eval(pred_boxes, pred_labels, pred_scores, gt_boxes, gt_labels, gt_difficults, iou_thresh=0.5, use_07_metric=True)) net.train() return aps
def get_pred_boxes(img, img_size, net, cls_id=0): # 0: void x = img.resize((img_size, img_size)) x = transform(x) x = Variable(x, volatile=True).cuda() loc_preds, cls_preds = net(x.unsqueeze(0)) box_coder = SSDBoxCoder(net) boxes, labels, scores = box_coder.decode( loc_preds.data.squeeze().cpu(), F.softmax(cls_preds.squeeze(), dim=1).data.cpu()) boxes = [box for i, box in enumerate(boxes) if labels[i] == cls_id] return boxes
def evaluate(net, img_dir, list_file, img_size, test_code): net.cuda() net.eval() def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels print('Loading dataset..') dataset = ListDataset(root=img_dir, list_file=list_file, transform=transform) if test_code: dataset.num_imgs = 1 dl = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = SSDBoxCoder(net) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] tqdm_dl = tqdm(dl, desc="Evaluate", ncols=0) for i, (inputs, box_targets, label_targets) in enumerate(tqdm_dl): gt_boxes.append(box_targets.squeeze(0)) gt_labels.append(label_targets.squeeze(0)) loc_preds, cls_preds = net(Variable(inputs.cuda(), volatile=True)) box_preds, label_preds, score_preds = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.01) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) ap_map_dict = voc_eval(pred_boxes, pred_labels, pred_scores, gt_boxes, gt_labels, iou_thresh=0.5, use_07_metric=False) return ap_map_dict
print('==> Building model..') # net = SSD512(num_classes=21) net = FPNSSD512(num_classes=81) # net.load_state_dict(torch.load(args.model)) best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last epoch if args.resume: print('==> Resuming from checkpoint..') checkpoint = torch.load(args.checkpoint) net.load_state_dict(checkpoint['net']) best_loss = checkpoint['loss'] start_epoch = checkpoint['epoch'] # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) img_size = 512 def transform_train(img, boxes, labels): img = random_distort(img) if random.random() < 0.5: img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True)
img = Image.open(image_path) ow = oh = 512 img = img.resize((ow,oh)) print('Predicting..') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)) ]) x = transform(img) x = Variable(x, volatile=True) loc_preds, cls_preds = net(x.unsqueeze(0)) print('Decoding..') box_coder = SSDBoxCoder(net) boxes, labels, scores = box_coder.decode( loc_preds.data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).data) print(labels) print(scores) print(boxes) draw = ImageDraw.Draw(img) L = ["pants","shirt","shorts","tshirt"] for box,label,score in zip(boxes,labels,scores): x1,y1,x2,y2 = list(box) draw.rectangle(list(box), outline='red') draw.rectangle((x1+1,y1+1,x1+150,y1+10), fill='black') draw.text((x1+3,y1+1),"{} {:0.2f}".format(L[label],score), fill='green')
num_classes = 12 net_single = SSD300(num_classes=num_classes) net_single.load_state_dict(torch.load(args.model)) best_coco_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last epoch if args.resume: print('==> Resuming from checkpoint..') checkpoint = torch.load(args.checkpoint) net_single.load_state_dict(checkpoint['net']) best_coco_loss = checkpoint['loss'] start_epoch = checkpoint['epoch'] # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net_single) batch_size = 64 visda_data = prepare_data( box_coder, batch_size=batch_size, n_workers=0, img_size=net_single.steps[-1], data_paths={ 'vda_root': '/scratch2/mytmp/render_detection_result/png_json', 'vda_list_train': '/scratch2/mytmp/render_detection_result/listdataset/visda18-detection-train.txt', 'vda_list_test': '/scratch2/mytmp/render_detection_result/listdataset/visda18-detection-test.txt', 'coco_root': '/scratch2/data/coco/train2014',
net_teacher = SSD300(num_classes=num_classes) net_teacher.load_state_dict(net_student.state_dict()) best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last epoch if args.resume: print('==> Resuming from checkpoint..') checkpoint = torch.load(args.checkpoint) net_student.load_state_dict(checkpoint['net']) best_loss = checkpoint['loss'] start_epoch = checkpoint['epoch'] # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net_student) batch_size = 16 visda_data = prepare_data( box_coder, batch_size=batch_size, n_workers=0, img_size=net_student.steps[-1], # data_paths={ # 'vda_root': '/scratch2/ben/png_json', # 'vda_list_train': '/scratch2/ben/visda18-detection-train.txt', # 'vda_list_test': '/scratch2/ben/visda18-detection-test.txt', # 'coco_root': '/scratch2/ben/coco', # 'coco_list_train': '/scratch2/ben/coco-train-8000.txt', # 'coco_list_test': '/scratch2/ben/coco-train-2240.txt' # } data_paths={ 'vda_root': '/scratch2/mytmp/render_detection_result//png_json', 'vda_list_train': '/scratch2/mytmp/render_detection_result/listdataset/visda18-detection-train.txt',
import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from torchcv.datasets import UnNormalize, Compose, ToTensor, ToPILImage, Normalize, Resize, RandomHorizontalFlip, RandomResizedCrop, ColorJitter from torchcv.models.ssd import SSD300, SSD512, SSDBoxCoder img_size = 512 net = SSD512(num_classes=11) preprocess = Compose([ColorJitter(0.5, 0.5, 0.3)]) transforms = Compose([ RandomHorizontalFlip(), \ RandomResizedCrop( (img_size,img_size), scale=(0.5, 2.0), ratio=(0.8, 1.2)), \ ToTensor(), \ Normalize((0.3465,0.3219,0.2842), (0.2358,0.2265,0.2274)), \ SSDBoxCoder(net) ]) trainset = BDD100kDataset('train', img_transform=preprocess, co_transform=transforms) trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True, num_workers=32) ori_size = (720, 1280) tensor2image = Compose([ UnNormalize((0.3465, 0.3219, 0.2842), (0.2358, 0.2265, 0.2274)), ToPILImage('RGB'),
def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root='/search/odin/liukuang/data/voc_all_images/', \ list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = SSDBoxCoder() pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = [] for line in f.readlines(): line = line.strip().split() d = [int(x) for x in line[1:]] gt_difficults.append(d)
net.cuda() cudnn.benchmark = True # WARNING: Don't use if using images w/ diff shapes # TODO: Check for this condition automatically best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last epoch criterion = SSDLoss() lr = 1e-3 momentum = 0.9 weight_decay = 1e-4 optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) def trn_transform(img, boxes, labels): img = random_distort(img) if random.random() < 0.5: img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(IMG_SIZE, IMG_SIZE), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) start_epoch = 0 # start from epoch 0 or last epoch if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.load_state_dict(checkpoint['net']) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.data_root, list_file=[opt.voc07_trainval, opt.voc12_trainval], transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=8) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=5e-4) best_map_ = 0 for epoch in range(start_epoch, start_epoch + 200): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) optimizer.zero_grad() loc_preds, cls_preds = net(inputs) loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() optimizer.step() train_loss += loss.data[0] if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', train_loss / (batch_idx + 1)) img = predict( net, box_coder, os.path.join(opt.data_root, trainset.fnames[batch_idx])) vis.img('predict', np.array(img).transpose(2, 0, 1)) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() aps = eval(net.module, test_num=epoch * 100 + 100) map_ = aps['map'] if map_ > best_map_: print('Saving..') state = { 'net': net.state_dict(), 'map': best_map_, 'epoch': epoch, } best_map_ = map_ if not os.path.isdir(os.path.dirname(opt.checkpoint)): os.mkdir(os.path.dirname(opt.checkpoint)) torch.save(state, opt.checkpoint + '/%s.pth' % best_map_)
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) start_epoch = 0 # start from epoch 0 or last epoch # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.data_root, list_file=[opt.voc07_trainval, opt.voc12_trainval], transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=8, pin_memory=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.module.load_state_dict(checkpoint['net']) criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=5e-4) best_map_ = 0 best_loss = 1e100 for epoch in range(start_epoch, start_epoch + 200): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 optimizer.zero_grad() ix = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) loc_preds, cls_preds = net(inputs) ix += 1 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() train_loss += loss.data[0] if (batch_idx + 1) % (opt.iter_size) == 0: # if True: for name, p in net.named_parameters(): p.grad.data.div_(ix) ix = 0 optimizer.step() optimizer.zero_grad() if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', train_loss / (batch_idx + 1)) img = predict( net, box_coder, os.path.join(opt.data_root, trainset.fnames[batch_idx])) vis.img('predict', np.array(img).transpose(2, 0, 1)) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() # if (epoch+1)%10 == 0 : # state = { # 'net': net.module.state_dict(), # # 'map': best_map_, # 'epoch': epoch, # } # torch.save(state, opt.checkpoint + '/%s.pth' % epoch) # if (epoch+1) % 30 == 0: # for param_group in optimizer.param_groups: # param_group['lr'] *= 0.1 current_loss = train_loss / (1 + batch_idx) if current_loss < best_loss: best_loss = current_loss torch.save(net.module.state_dict(), '/tmp/dsod.pth') if (epoch + 1) % opt.eval_every == 0: net.module.load_state_dict(torch.load('/tmp/dsod.pth')) aps = eval(net.module) map_ = aps['map'] if map_ > best_map_: print('Saving..') state = { 'net': net.module.state_dict(), 'map': best_map_, 'epoch': epoch, } best_map_ = map_ if not os.path.isdir(os.path.dirname(opt.checkpoint)): os.mkdir(os.path.dirname(opt.checkpoint)) best_path = opt.checkpoint + '/%s.pth' % best_map_ torch.save(state, best_path) else: net.module.load_state_dict(torch.load(best_path)['net']) for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 vis.log( dict(epoch=(epoch + 1), map=map_, loss=train_loss / (batch_idx + 1)))