def get_dataset(datasetnames): names = datasetnames.split('+') dataset = RoiDataset(get_imdb(names[0])) print('load dataset {}'.format(names[0])) for name in names[1:]: tmp = RoiDataset(get_imdb(name)) dataset += tmp print('load and add dataset {}'.format(name)) return dataset
def get_dataset(datasetnames): names = datasetnames.split( '+') # ['voc_2007_trainval', 'voc_2012_trainval'] dataset = RoiDataset(get_imdb(names[0])) # 'voc_2007_trainval' print('load dataset {}'.format(names[0])) for name in names[1:]: tmp = RoiDataset(get_imdb(name)) dataset += tmp print('load and add dataset {}'.format(name)) return dataset
def combined_roidb(imdb_names, training=True): """ Combine multiple roidbs """ def get_training_roidb(imdb): """Returns a roidb (Region of Interest database) for use in training.""" if cfg.TRAIN.USE_FLIPPED: print('Appending horizontally-flipped training examples...') imdb.append_flipped_images() print('done') print('Preparing training data...') prepare_roidb(imdb) # ratio_index = rank_roidb_ratio(imdb) print('done') return imdb.roidb def get_roidb(imdb_name): imdb = get_imdb(imdb_name) print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) return roidb roidbs = [get_roidb(s) for s in imdb_names.split('+')] roidb = roidbs[0] if len(roidbs) > 1: for r in roidbs[1:]: roidb.extend(r) imdb = get_imdb(imdb_names.split('+')[0]) # imdb = dataset.imdb.imdb(imdb_names, tmp.classes) else: imdb = get_imdb(imdb_names) if training: roidb = filter_roidb(roidb) return imdb, roidb
def train_model(dataset, trainset, num_classes, net, pad, cachepath): cachefolder = osp.join(cachepath, dataset+'_'+trainset, net) if not osp.isdir(cachefolder): os.makedirs(cachefolder) ptag = 'S' + ('%d_'*len(options.seg.sizes)) % tuple(options.seg.sizes) \ + 'IB%d_B%d_E%d-uniform' % (options.seg.imbatch,options.seg.batchsize,options.seg.epoch) if options.seg.trainflip: ptag += '_F' tag = 'S' + ('%d_'*len(options.seg.sizes)) % tuple(options.seg.sizes) \ + 'IB%d_B%d_E%d-uniform' % (options.seg.imbatch,options.seg.batchsize,options.seg.ftepochall) if options.seg.trainflip: tag += '_F' trainfolder = osp.join(cachefolder,'TRAIN') finetunefolder = osp.join(cachefolder,'FT-TR') if not osp.isdir(finetunefolder): os.makedirs(finetunefolder) prefile = osp.join(trainfolder,ptag+'.caffemodel') targetfile = osp.join(finetunefolder,tag+'.caffemodel') targetlock = osp.join(finetunefolder,tag+'.lock') if osp.exists(targetfile) or not osp.exists(prefile): return try: os.mkdir(targetlock) except Exception as e: return # looks like it is a tricky task to redirect the stdout/stderr # leave it as of now # logfolder = osp.join(trainfolder,'logs') print '%s_%s<-%s: %s' % (dataset,trainset,net,tag) time.sleep(5) datasetname = '%s_%s' % (dataset,trainset) segdb = get_imdb(datasetname) # create the solver and train file solverpath, _ = dump_prototxts(dataset, trainset, num_classes, segdb.num_images, net, pad) # solverpath = osp.join(options.netpath,net,'pysol-seg-'+dataset+'-'+trainset+'.prototxt') # start training # caffe.set_random_seed(options.seed) np.random.seed(options.seed) sw = SolverWrapper(solverpath, segdb, finetunefolder, tag, prefile) sw.train_model() del sw # after training os.rmdir(targetlock)
def train(): pretrained_model = os.path.join(cfg.PRETRAINED_DIR, 'npy', 'yolov2.npy') assert os.path.exists(pretrained_model), \ 'Model path {} does not exist!'.format(pretrained_model) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) name = DATASET + '_train' snapshot_infix = DATASET output_dir = os.path.join(cfg.TRAIN.TRAINED_DIR, DATASET) imdb = get_imdb(name) sw = SolverWrapper(imdb, snapshot_infix, output_dir, pretrained_model) sw.train_net()
def train(): # define the hyper parameters first args = parse_args() args.steplr_epoch = cfg.steplr_epoch args.steplr_factor = cfg.steplr_factor args.weight_decay = cfg.weight_decay args.momentum = cfg.momentum print('Called with args:') print(args) lr = args.lr # initial tensorboardX writer if args.use_tfboard: if args.exp_name == 'default': writer = SummaryWriter() else: writer = SummaryWriter('runs/' + args.exp_name) args.imdb_name = 'trainval' args.imdbval_name = 'trainval' output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # load dataset print('loading dataset....') train_dataset = RoiDataset(get_imdb(args.imdb_name)) print('dataset loaded.') print('training rois number: {}'.format(len(train_dataset))) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate, drop_last=True) # initialize the model print('initialize the model') tic = time.time() model = Yolov2(pretrained=True, arch=args.arch) toc = time.time() print('model loaded: cost time {:.2f}s'.format(toc - tic)) # initialize the optimizer optimizer = optim.SGD([{ "params": model.trunk.parameters(), "lr": args.lr * cfg.former_lr_decay }, { "params": model.conv3.parameters() }, { "params": model.conv4.parameters() }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.steplr_epoch, gamma=args.steplr_factor) if args.resume: print('resume training enable') resume_checkpoint_name = 'yolov2_epoch_{}.pth'.format( args.checkpoint_epoch) resume_checkpoint_path = os.path.join(output_dir, resume_checkpoint_name) print('resume from {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) args.start_epoch = checkpoint['epoch'] + 1 lr = checkpoint['lr'] print('learning rate is {}'.format(lr)) adjust_learning_rate(optimizer, lr) if args.use_cuda: model.cuda() if args.mGPUs: model = nn.DataParallel(model) # set the model mode to train because we have some layer whose behaviors are different when in training and testing. # such as Batch Normalization Layer. model.train() iters_per_epoch = int(len(train_dataset) / args.batch_size) # start training for epoch in range(args.start_epoch, args.max_epochs + 1): loss_temp = 0 tic = time.time() train_data_iter = iter(train_dataloader) scheduler.step() lr = get_lr(optimizer) if cfg.multi_scale and epoch in cfg.epoch_scale: cfg.scale_range = cfg.epoch_scale[epoch] print('change scale range to {}'.format(cfg.scale_range)) for step in range(iters_per_epoch): if cfg.multi_scale and (step + 1) % cfg.scale_step == 0: scale_index = np.random.randint(*cfg.scale_range) cfg.input_size = cfg.input_sizes[scale_index] ##print('change input size {}'.format(cfg.input_size)) im_data, boxes, gt_classes, num_obj = next(train_data_iter) if args.use_cuda: im_data = im_data.cuda() boxes = boxes.cuda() gt_classes = gt_classes.cuda() num_obj = num_obj.cuda() im_data_variable = Variable(im_data) box_loss, iou_loss, class_loss = model(im_data_variable, boxes, gt_classes, num_obj, training=True) loss = box_loss.mean()+ iou_loss.mean() \ + class_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() loss_temp += loss.item() if (step + 1) % args.display_interval == 0: toc = time.time() loss_temp /= args.display_interval iou_loss_v = iou_loss.mean().item() box_loss_v = box_loss.mean().item() class_loss_v = class_loss.mean().item() log = "[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, iou_loss: %.4f, box_loss: %.4f, cls_loss: %.4f" \ % (epoch, step+1, iters_per_epoch, loss_temp, lr, iou_loss_v, box_loss_v, class_loss_v) print(log) logfile = os.path.join(output_dir, 'training_log.txt') with open(logfile, 'a') as f: print(log, file=f) if args.use_tfboard: n_iter = (epoch - 1) * iters_per_epoch + step + 1 writer.add_scalar('lr', lr, n_iter) writer.add_scalar('losses/loss', loss_temp, n_iter) writer.add_scalar('losses/iou_loss', iou_loss_v, n_iter) writer.add_scalar('losses/box_loss', box_loss_v, n_iter) writer.add_scalar('losses/cls_loss', class_loss_v, n_iter) loss_temp = 0 tic = time.time() if epoch % args.save_interval == 0: save_name = os.path.join(output_dir, 'yolov2_epoch_{}.pth'.format(epoch)) torch.save( { 'model': model.module.state_dict() if args.mGPUs else model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
def test(): args = parse_args() args.conf_thresh = 0.005 args.nms_thresh = 0.45 if args.vis: args.conf_thresh = 0.5 print('Called with args:') print(args) # prepare dataset val_imdb = get_imdb(args.dataset) val_dataset = RoiDataset(val_imdb, train=False) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) # load model model = Yolov2(arch=args.arch) # weight_loader = WeightLoader() # weight_loader.load(model, 'yolo-voc.weights') # print('loaded') model_path = os.path.join(args.output_dir, args.model_name + '.pth') print('loading model from {}'.format(model_path)) if torch.cuda.is_available(): checkpoint = torch.load(model_path) else: checkpoint = torch.load(model_path, map_location='cpu') model.load_state_dict(checkpoint['model']) if args.use_cuda: model.cuda() model.eval() print('model loaded') dataset_size = len(val_imdb.image_index) print('classes: ', val_imdb.num_classes) all_boxes = [[[] for _ in range(dataset_size)] for _ in range(val_imdb.num_classes)] det_file = os.path.join(args.output_dir, 'detections.pkl') results = [] img_id = -1 with torch.no_grad(): for batch, (im_data, im_infos) in enumerate(val_dataloader): if args.use_cuda: im_data_variable = Variable(im_data).cuda() else: im_data_variable = Variable(im_data) yolo_outputs = model(im_data_variable) for i in range(im_data.size(0)): img_id += 1 output = [item[i].data for item in yolo_outputs] im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]} detections = yolo_eval(output, im_info, conf_threshold=args.conf_thresh, nms_threshold=args.nms_thresh) if img_id % 100 == 0: print('im detect [{}/{}]'.format(img_id + 1, len(val_dataset))) if len(detections) > 0: for cls in range(val_imdb.num_classes): inds = torch.nonzero(detections[:, -1] == cls).view(-1) if inds.numel() > 0: cls_det = torch.zeros((inds.numel(), 5)) cls_det[:, :4] = detections[inds, :4] cls_det[:, 4] = detections[inds, 4] * detections[inds, 5] all_boxes[cls][img_id] = cls_det.cpu().numpy() img = Image.open(val_imdb.image_path_at(img_id)) if len(detections) > 0: detect_result = {} boxes = detections[:, :5].cpu().numpy() classes = detections[:, -1].long().cpu().numpy() class_names = val_imdb.classes num_boxes = boxes.shape[0] labels = [] for i in range(num_boxes): det_bbox = tuple( np.round(boxes[i, :4]).astype(np.int64)) score = boxes[i, 4] gt_class_ind = classes[i] class_name = class_names[gt_class_ind] disp_str = '{}: {:.2f}'.format(class_name, score) bbox = tuple(np.round(boxes[i, :4]).astype(np.int64)) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] box2d = {} box2d["x1"] = str(xmin) box2d["y1"] = str(ymin) box2d["x2"] = str(xmax) box2d["y2"] = str(ymax) bbox = {} bbox["box2d"] = box2d bbox["category"] = class_name labels.append(bbox) detect_result["ImageID"] = os.path.basename( val_imdb.image_path_at(img_id)) detect_result["labels"] = labels results.append(detect_result) if args.vis: img = Image.open(val_imdb.image_path_at(img_id)) if len(detections) == 0: continue det_boxes = detections[:, :5].cpu().numpy() det_classes = detections[:, -1].long().cpu().numpy() im2show = draw_detection_boxes( img, det_boxes, det_classes, class_names=val_imdb.classes) plt.figure() plt.imshow(im2show) plt.show() #if img_id > 10: # break print(results) results_file = os.path.join(args.output_dir, 'detections.json') with open(results_file, 'w') as f: json.dump(results, f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': ')) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
import _init_paths from config.options import options from dataset.factory import get_imdb import pdb for split in ['train', 'trainval']: name = 'aug-voc2012_{}'.format(split) print 'DS: ' + name imdb = get_imdb(name) imdb.compute_stats() for split in ['train']: name = 'context_{}'.format(split) print 'DS: ' + name imdb = get_imdb(name) imdb.compute_stats() name = 'context33_{}'.format(split) print 'DS: ' + name imdb = get_imdb(name) imdb.compute_stats() name = 'context20_{}'.format(split) print 'DS: ' + name imdb = get_imdb(name) imdb.compute_stats()
def test(): args = parse_args() args.conf_thresh = 0.005 args.nms_thresh = 0.45 if args.vis: args.conf_thresh = 0.5 print('Called with args:') print(args) # prepare dataset if args.dataset == 'voc07trainval': args.imdbval_name = 'voc_2007_trainval' elif args.dataset == 'voc07test': args.imdbval_name = 'voc_2007_test' else: raise NotImplementedError val_imdb = get_imdb(args.imdbval_name) val_dataset = RoiDataset(val_imdb, train=False) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) # load model model = Yolov2() # weight_loader = WeightLoader() # weight_loader.load(model, 'yolo-voc.weights') # print('loaded') model_path = os.path.join(args.output_dir, args.model_name + '.pth') print('loading model from {}'.format(model_path)) if torch.cuda.is_available(): checkpoint = torch.load(model_path) else: checkpoint = torch.load(model_path, map_location='cpu') model.load_state_dict(checkpoint['model']) if args.use_cuda: model.cuda() model.eval() print('model loaded') dataset_size = len(val_imdb.image_index) all_boxes = [[[] for _ in range(dataset_size)] for _ in range(val_imdb.num_classes)] det_file = os.path.join(args.output_dir, 'detections.pkl') img_id = -1 with torch.no_grad(): for batch, (im_data, im_infos) in enumerate(val_dataloader): if args.use_cuda: im_data_variable = Variable(im_data).cuda() else: im_data_variable = Variable(im_data) yolo_outputs = model(im_data_variable) for i in range(im_data.size(0)): img_id += 1 output = [item[i].data for item in yolo_outputs] im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]} detections = yolo_eval(output, im_info, conf_threshold=args.conf_thresh, nms_threshold=args.nms_thresh) print('im detect [{}/{}]'.format(img_id + 1, len(val_dataset))) if len(detections) > 0: for cls in range(val_imdb.num_classes): inds = torch.nonzero(detections[:, -1] == cls).view(-1) if inds.numel() > 0: cls_det = torch.zeros((inds.numel(), 5)) cls_det[:, :4] = detections[inds, :4] cls_det[:, 4] = detections[inds, 4] * detections[inds, 5] all_boxes[cls][img_id] = cls_det.cpu().numpy() if args.vis: img = Image.open(val_imdb.image_path_at(img_id)) if len(detections) == 0: continue det_boxes = detections[:, :5].cpu().numpy() det_classes = detections[:, -1].long().cpu().numpy() im2show = draw_detection_boxes( img, det_boxes, det_classes, class_names=val_imdb.classes) plt.figure() plt.imshow(im2show) plt.show() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
def get_roidb(imdb_name): imdb = get_imdb(imdb_name) print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) return roidb
def get_dataset(datasetnames): names = datasetnames dataset = RoiDataset(get_imdb(names)) return dataset
def test(): args = parse_args() if args.vis: args.conf_thresh = 0.5 # load test data if args.dataset == 'voc07test': dataset_name = 'voc_2007_test' elif args.dataset == 'voc12test': dataset_name = 'voc_2012_test' else: raise NotImplementedError test_imdb = get_imdb(dataset_name) test_dataset = RoiDataset(test_imdb, train=False) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) # load model model = YOLOv2() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) weight_file_path = os.path.join( args.output_dir, 'yolov2_epoch_{}.pth'.format(args.check_epoch)) if torch.cuda.is_available: state_dict = torch.load(weight_file_path) else: state_dict = torch.load(weight_file_path, map_location='cpu') model.load_state_dict(state_dict['model']) if args.use_cuda: model = model.cuda() model.eval() num_data = len(test_dataset) all_boxes = [[[] for _ in range(num_data)] for _ in range(test_imdb.num_classes)] img_id = -1 det_file = os.path.join(args.output_dir, 'detections.pkl') with torch.no_grad(): for batch_size, (im_data, im_infos) in enumerate(test_dataloader): if args.use_cuda: im_data = im_data.cuda() im_infos = im_infos.cuda() im_data_variable = Variable(im_data) outputs = model(im_data_variable) for i in range(im_data.size(0)): img_id += 1 output = [item[i].data for item in outputs] im_info = im_infos[i] detections = eval(output, im_info, args.conf_thresh, args.nms_thresh) if len(detections) > 0: for i in range(cfg.CLASS_NUM): idxs = torch.nonzero(detections[:, -1] == i).view(-1) if idxs.numel() > 0: cls_det = torch.zeros((idxs.numel(), 5)) cls_det[:, :4] = detections[idxs, :4] cls_det[:, 4] = detections[idxs, 4] * detections[idxs, 5] all_boxes[i][img_id] = cls_det.cpu().numpy() if args.vis: img = Image.open(test_imdb.image_path_at(img_id)) if len(detections) == 0: continue det_boxes = detections[:, :5].cpu().numpy() det_classes = detections[:, -1].long().cpu().numpy() imshow = draw_detection_boxes( img, det_boxes, det_classes, class_names=test_imdb.classes) plt.figure() plt.imshow(imshow) plt.show() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) test_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)