def main(): parser = argparse.ArgumentParser(description='manual to this script') parser.add_argument('--gpu', type=str, default='0') parser.add_argument('--weights', type=str, default='R50-pami.pth') parser.add_argument('--resized', type=int, default='448') # adjust the input img's size parser.add_argument('--voc-root', type=str, default='/mnt/4Tvolume/wyh/dataset/VOCdevkit/VOC2012/') parser.add_argument('--mat-save', type=str, default='feats_labels.mat') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu num_classes = 21 print("loading pretrained model: {}!".format(args.weights)) model = resnet50(pretrained=False, num_classes=num_classes) model.load_state_dict(torch.load(args.weights), strict=False) print("loading complete") dataset = VOCDataset(args.voc_root, args.resized) data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) print("generating scores and labels") scores, vecs, gts = inference(data_loader, dataset, model, gtcls_filter=True, start_idx=0) # pred sio.savemat(args.mat_save, {'vecs': vecs, 'scores': scores, 'gts': gts}) num_instances = len(vecs) print("generate scores and labels completed") do_cut(n_rois=num_instances, data=dataset, save='proposals/')
def train(train_config): logger = Logger(HOME+'/log', train_config.basenet) if train_config.dataset_name == 'VOC': cfg = voc_config dataset = VOCDataset(DATA_DIR, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif train_config.dataset_name == 'COCO': cfg = coco_config dataset = COCODataset(DATA_DIR, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if train_config.visdom: import visdom viz = visdom.Visdom() ssd_net = SSD('train', train_config.basenet, cfg['min_dim'], cfg['num_classes'], with_fpn=train_config.with_fpn) net = ssd_net if train_config.cuda: net = nn.DataParallel(ssd_net) cudnn.benchmark = True if train_config.resume: logger('Loading {} ...'.format(train_config.resume)) load_weights = torch.load( train_config.resume, map_location=lambda storage, loc: storage) ssd_net.load_state_dict(load_weights) if train_config.cuda: net = net.cuda() if not train_config.resume: logger('Initializing weights ...') ssd_net.topnet.apply(weights_init) ssd_net.loc_layers.apply(weights_init) ssd_net.conf_layers.apply(weights_init) optimizer = optim.Adam(net.parameters(), lr=train_config.lr, weight_decay=train_config.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, train_config.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 logger('Loading the dataset...') epoch_size = len(dataset) // train_config.batch_size logger('Training SSD on:{}'.format(dataset.name)) # logger('using the specified args:') step_index = 0 if train_config.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, train_config.batch_size, num_workers=train_config.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) t0 = time.time() for iteration in range(train_config.start_iter, cfg['max_iter']): if train_config.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss.item(), conf_loss.item(), epoch_plot, None, 'append', epoch_size) logger('epoch = {} : loss = {}, loc_loss = {}, conf_loss = {}'.format( epoch, loc_loss + conf_loss, loc_loss, conf_loss)) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, train_config.lr, train_config.gamma, step_index) # load train data images, targets = next(batch_iterator) if iteration//epoch_size > 0 and iteration % epoch_size == 0: batch_iterator = iter(data_loader) print(iteration) if train_config.cuda: images = images.cuda() targets = [ann.cuda()for ann in targets] # else: # images=torch.tensor(images) # targets=torch.tensor(targets) # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if train_config.visdom: loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 50 == 0: t1 = time.time() logger('timer: %.4f sec. || ' % (t1 - t0)+'iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()) + ' || loc_loss: %.4f ||' % (loss_l.item()) + ' || conf_loss: %.4f ||' % (loss_c.item())) t0 = time.time() if train_config.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: logger('Saving state, iter:%d' % iteration) torch.save(ssd_net.state_dict(), train_config.save_folder + 'ssd224_VOC_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), train_config.save_folder + 'ssd224_VOC.pth')
prob = prob.reshape((-1, 20)) conf = conf.reshape((-1, 1)) prob = prob * conf nms = non_maximum_suppression(boxes, prob, 0.45, 0.005) for box, cls, score in nms: filename = 'result/comp4_det_test_{:s}.txt'.format(classes[cls]) with open(filename, mode='a') as f: print(name, score, box.left * w, box.top * h, box.right * w, box.bottom * h, file=f) dataset = VOCDataset(args.root, [t.split('-') for t in args.test], im_size) info = list() batch = list() for i in range(len(dataset)): img = dataset.image(i) h, w, _ = img.shape info.append((dataset.name(i), (w, h))) img = cv2.resize(img, (im_size, im_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, dtype=np.float32) / 255.0 img = img.transpose(2, 0, 1) batch.append(img) if len(batch) == args.batchsize: boxes, conf, prob = model.predict(
for box, cls, score in nms: if score < 0.01: break box *= size filename = 'comp4_det_test_{:s}.txt'.format(voc.names[cls]) with open(filename, mode='a') as f: print(name, score, box.left, box.top, box.right, box.bottom, file=f) dataset = VOCDataset(args.root, [t.split('-') for t in args.test]) info = list() batch = list() for i in range(len(dataset)): src = dataset.image(i) info.append((dataset.name(i), src.shape[1::-1])) x = cv2.resize(src, (model.insize, model.insize)).astype(np.float32) x -= config.mean x = x.transpose(2, 0, 1) batch.append(x) if len(batch) == args.batchsize: loc, conf = model(chainer.Variable(xp.array(batch), volatile=True)) for i, (name, size) in enumerate(info): dump_result(name, size, loc.data[i], conf.data[i])
darknet.load_state_dict(dst_state_dict) # Load YOLO model. yolo = YOLOv1(darknet.features) yolo.conv_layers = torch.nn.DataParallel(yolo.conv_layers) yolo.cuda() # Setup loss and optimizer. criterion = Loss(feature_size=yolo.feature_size) optimizer = torch.optim.SGD(yolo.parameters(), lr=init_lr, momentum=momentum, weight_decay=weight_decay) # Load Pascal-VOC dataset. train_dataset = VOCDataset(True, image_dir, train_label) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_dataset = VOCDataset(False, image_dir, val_label) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) print('Number of training images: ', len(train_dataset)) # Open TensorBoardX summary writer log_dir = datetime.now().strftime('%b%d_%H-%M-%S')
multibox_encoder = MultiBoxEncoder(n_scale=6, variance=(0.1, 0.2), grids=(38, 19, 10, 5, 3, 1), aspect_ratios=((2, ), (2, 3), (2, 3), (2, 3), (2, ), (2, ))) model = SSD300(n_class=20, n_anchors=multibox_encoder.n_anchors) if args.init: serializers.load_npz(args.init, model) model.train = True if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() train = VOCDataset(args.root, [t.split('-') for t in args.train], size, multibox_encoder) train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) snapshot_interval = 1000, 'iteration' log_interval = 10, 'iteration' trainer.extend(extensions.dump_graph('main/loss'))
# load model print("loading initial model...") yolov2 = YOLOv2(config[args.dtype]) model = YOLOv2Predictor(yolov2) if args.init: serializers.load_hdf5(args.init, yolov2) model.predictor.train = True model.predictor.finetune = False if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() train = VOCDataset(args.root, [t.split('-') for t in args.train], size) train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) optimizer = optimizers.MomentumSGD(lr=learning_rate, momentum=momentum) optimizer.use_cleargrads() optimizer.setup(model) # start to train print("start training") for batch in range(max_batches): if str(batch) in learning_schedules: optimizer.lr = learning_schedules[str(batch)] # generate sample