def prefetch_test(opt): Dataset = dataset_factory[opt.dataset] opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) model = create_model(opt.arch, opt.heads, opt.head_conv) #model = load_model(self.model, opt.load_model) model.eval() p2c = Pytorch2Caffe(model, '/home/lijf/MT', 'parking', [3, 512, 512]) p2c.start()
def main(opt): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.debug = max(opt.debug, 0) print(opt.dataset) Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) Detector = detector_factory[opt.task] detector = Detector(opt) image_names = load_benchmark_file(opt) p_output = opt.data_dir + "/" + opt.output with open(p_output, "w") as f: # f.write("img,id,class,tlx,tly,brx,bry,score\n") for (image_name) in image_names: image_path = opt.data_dir + "/images/" + image_name try: img = cv2.imread(image_path) image_height, image_width, _ = img.shape except Exception as e: print("file read error, path ", image_path) continue print("try to detect on ", image_path) ret = detector.run(image_path) detection = ret['results'] for key, items in detection.items(): for i in range(items.shape[0]): cx = (items[i, 0] + items[i, 2]) / 2.0 cy = (items[i, 1] + items[i, 3]) / 2.0 w = items[i, 2] - items[i, 0] h = items[i, 3] - items[i, 1] cx /= image_width w /= image_width cy /= image_height h /= image_height line = image_name + "," + str(cx) + "," + str(cy) + "," + str(w) + "," + str(h) + "," + str( items[i, 4]) + "," + str(key) + "\n" f.write(line) if len(detection) == 0: f.write(image_name + "\n")
def test(opt): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str Dataset = dataset_factory[opt.dataset] opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) Logger(opt) opt.debug = max(opt.debug, 1) Detector = detector_factory[opt.task] split = 'val' if opt.trainval else 'test' dataset = Dataset(opt, split) detector = Detector(opt) results = {} num_iters = len(dataset) bar = Bar('{}'.format(opt.exp_id), max=num_iters) time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] avg_time_stats = {t: AverageMeter() for t in time_stats} for ind in range(num_iters): img_id = dataset.images[ind] img_info = dataset.coco.loadImgs(ids=[img_id])[0] img_path = os.path.join(dataset.img_dir, img_info['file_name']) if opt.task == 'ddd': ret = detector.run(img_path, img_info['calib']) else: ret = detector.run(img_path) results[img_id] = ret['results'] Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) for t in avg_time_stats: avg_time_stats[t].update(ret[t]) Bar.suffix = Bar.suffix + '|{} {:.3f} '.format( t, avg_time_stats[t].avg) bar.next() bar.finish() dataset.run_eval(results, opt.save_dir)
def prefetch_test(opt): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str Dataset = dataset_factory[opt.dataset] opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) Logger(opt) Detector = detector_factory[opt.task] split = 'val' if not opt.trainval else 'test' dataset = Dataset(opt, split) detector = Detector(opt) data_loader = torch.utils.data.DataLoader(PrefetchDataset( opt, dataset, detector.pre_process), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) results = {} num_iters = len(dataset) bar = Bar('{}'.format(opt.exp_id), max=num_iters) time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] avg_time_stats = {t: AverageMeter() for t in time_stats} for ind, (img_id, pre_processed_images) in enumerate(data_loader): ret = detector.run(pre_processed_images) img_id = np.array(img_id) results[img_id.astype(np.int32)[0]] = ret['results'] # results[img_id.numpy().astype(np.int32)[0]] = ret['results'] Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) for t in avg_time_stats: avg_time_stats[t].update(ret[t]) Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format( t, tm=avg_time_stats[t]) bar.next() bar.finish() dataset.run_eval(results, opt.save_dir)
# data_type='mot', # result_f_name=result_file_name, # save_dir=frame_dir, # show_image=False, # frame_rate=frame_rate, # mode='detect') # only for tmp detection evaluation... output_dir = '/users/duanyou/c5/results_new/results_all/mcmot_hrnet18_deconv_ep3' eval_seq_and_output_dets(opt=opt, data_loader=data_loader, data_type='mot', result_f_name=result_file_name, out_dir=output_dir, save_dir=frame_dir, show_image=False) except Exception as e: logger.info(e) if opt.output_format == 'video': output_video_path = osp.join(result_root, 'result.mp4') cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -b 5000k -c:v mpeg4 {}' \ .format(osp.join(result_root, 'frame'), output_video_path) os.system(cmd_str) if __name__ == '__main__': opt = opts().init() run_demo(opt)
def run(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test print('Setting up data...') Dataset = get_dataset(opt.dataset, opt.task, opt.multi_scale) # if opt.task==mot -> JointDataset f = open(opt.data_cfg ) # choose which dataset to train '../src/lib/cfg/mot15.json', data_config = json.load(f) trainset_paths = data_config['train'] # 训练集路径 dataset_root = data_config['root'] # 数据集所在目录 print("Dataset root: %s" % dataset_root) f.close() # Image data transformations transforms = T.Compose([T.ToTensor()]) # Dataset dataset = Dataset(opt=opt, root=dataset_root, paths=trainset_paths, img_size=opt.input_wh, augment=True, transforms=transforms) opt = opts().update_dataset_info_and_set_heads(opt, dataset) print("opt:\n", opt) logger = Logger(opt) # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str # 多GPU训练 # print("opt.gpus_str: ", opt.gpus_str) opt.device = torch.device('cuda:0' if opt.gpus[0] >= 0 else 'cpu') # 设置GPU #opt.device = device #NC UPDATE - fallback to original fairmot #opt.gpus = my_visible_devs #NC UPDATE print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) # 初始化优化器 optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) # Get dataloader if opt.is_debug: if opt.multi_scale: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, drop_last=True) # debug时不设置线程数(即默认为0) else: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=True, drop_last=True) # debug时不设置线程数(即默认为0) else: if opt.multi_scale: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True) else: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=True, drop_last=True) # debug时不设置线程数(即默认为0) print('Starting training...') Trainer = train_factory[opt.task] trainer = Trainer(opt=opt, model=model, optimizer=optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) # trainer.set_device(opt.gpus, opt.chunk_sizes, device) #NC UPDATE best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' # Train an epoch log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) else: # mcmot_last_track or mcmot_last_det if opt.id_weight > 0: # do tracking(detection and re-id) save_model( os.path.join(opt.save_dir, 'mcmot_last_track_' + opt.arch + '.pth'), epoch, model, optimizer) else: # only do detection # save_model(os.path.join(opt.save_dir, 'mcmot_last_det_' + opt.arch + '.pth'), # epoch, model, optimizer) save_model( os.path.join(opt.save_dir, 'mcmot_last_det_' + opt.arch + '.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr if epoch % 10 == 0: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) logger.close()
# epoch, model, optimizer) save_model( os.path.join(opt.save_dir, 'mcmot_last_det_' + opt.arch + '.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr if epoch % 10 == 0: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) logger.close() if __name__ == '__main__': # os.environ['CUDA_VISIBLE_DEVICES'] = '1' # '0, 1' opt = opts().parse() # print("opt.gpus: ", opt.gpus) run(opt)
x = self.layer3(x) x = self.layer4(x) x = self.deconv_layers(x) ret = [] ## change dict to list for head in self.heads: ret.append(self.__getattr__(head)(x)) return ret forward = { 'dla': pose_dla_forward, 'dlav0': dlav0_forward, 'resdcn': resnet_dcn_forward } opt = opts().init( ) ## change lib/opts.py add_argument('task', default='ctdet'....) to add_argument('--task', default='ctdet'....) opt.arch = 'resdcn_18' opt.heads = OrderedDict([('hm', 10), ('reg', 2), ('wh', 2)]) opt.head_conv = 256 if 'dla' in opt.arch else 64 print(opt) model = create_model(opt.arch, opt.heads, opt.head_conv) model.forward = MethodType(forward[opt.arch.split('_')[0]], model) load_model(model, 'ctdet_bdd_resnet18_512.pth') model.eval() model.cuda() input = torch.zeros([1, 3, 512, 512]).cuda() onnx.export(model, input, "ctdet_bdd_resnet18_512.onnx", verbose=True, operator_export_type=OperatorExportTypes.ONNX)
def main(opt, opt_t): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test if opt.target_dataset: Dataset_target = get_dataset(opt_t.target_dataset, opt_t.task) opt_t = opts().update_dataset_info_and_set_heads( opt_t, Dataset_target) # target dataset Dataset_source = get_dataset(opt.source_dataset, opt.task) opt = opts().update_dataset_info_and_set_heads( opt, Dataset_source) # source dataset print(opt) logger = Logger(opt) # record os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) # create model optimizer = torch.optim.Adam(model.parameters(), opt.lr) # create optimizer start_epoch = 0 if opt.load_model != '': # load model model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] # set trainer function trainer = Trainer(opt, model, optimizer) # initial trainer trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up source_val data...') val_source_loader = torch.utils.data.DataLoader(Dataset_source(opt, 'val'), batch_size=1, shuffle=False, num_workers=0, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_source_loader) val_source_loader.dataset.run_eval(preds, opt.save_dir) return # source loader print('Setting up source_train data...') train_source_loader = torch.utils.data.DataLoader( Dataset_source(opt, 'train'), # modify SOURCE dataset parameters batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) if opt.target_dataset: # target loader print('Setting up target_train data...') train_target_loader = torch.utils.data.DataLoader( Dataset_target(opt_t, 'train'), # modify TARGET dataset parameters batch_size=opt_t.batch_size, shuffle=True, num_workers=opt_t.num_workers, pin_memory=True, drop_last=True) print('DA MODE') else: train_target_loader = None print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_source_loader, train_target_loader) # do train logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): # log information logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) # if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: # save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), # save last-model # epoch, model, optimizer) # with torch.no_grad(): # log_dict_val, preds = trainer.val(epoch, val_source_loader) # cal val-set loss # for k, v in log_dict_val.items(): # logger.scalar_summary('val_{}'.format(k), v, epoch) # logger.write('{} {:8f} | '.format(k, v)) # if log_dict_val[opt.metric] < best: # best = log_dict_val[opt.metric] # save_model(os.path.join(opt.save_dir, 'model_best.pth'), # save best-model # epoch, model) # else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: # update learning rate save_model( os.path.join( opt.save_dir, 'model_{}.pth'.format(epoch)), # save lr_step-model epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
# log_dict_val, preds = trainer.val(epoch, val_source_loader) # cal val-set loss # for k, v in log_dict_val.items(): # logger.scalar_summary('val_{}'.format(k), v, epoch) # logger.write('{} {:8f} | '.format(k, v)) # if log_dict_val[opt.metric] < best: # best = log_dict_val[opt.metric] # save_model(os.path.join(opt.save_dir, 'model_best.pth'), # save best-model # epoch, model) # else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: # update learning rate save_model( os.path.join( opt.save_dir, 'model_{}.pth'.format(epoch)), # save lr_step-model epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close() if __name__ == '__main__': opt_s = opts().parse() opt_t = opts().parse() main(opt_s, opt_t)
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=0, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) # benchmark=True 自动寻找最适合当前配置的高效算法,来达到优化运行效率的wento torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) def adjust_learning_rate(optimizer, epoch): # use warmup if epoch < 5: lr = opt.lr * ((epoch + 1) / 5) else: # use cosine lr PI = 3.14159 lr = opt.lr * 0.5 * (1 + math.cos(epoch * PI / 20)) # print(1111) for param_group in optimizer.param_groups: param_group['lr'] = lr os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) # optimizer = torch.optim.Adam(model.parameters(), opt.lr) optimizer = torch.optim.SGD(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') # num_worker=0是主进程读取; >0使用多进程读取,子进程读取数据时,训练程序会卡住,GPU utils为0, val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): adjust_learning_rate(optimizer, epoch) mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % 2 == 0 and epoch > 10: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) # test(opt) # opt.model = None if epoch > 20: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) # elif 80 < epoch <=100 and epoch % 3 == 0: # save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), # epoch, model, optimizer) logger.write('\n') # if epoch in opt.lr_step: # save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), # epoch, model, optimizer) # lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) # print('Drop LR to', lr) # for param_group in optimizer.param_groups: # param_group['lr'] = lr print('Epoch is Finished') logger.close()
def main(opt): torch.manual_seed( opt.seed ) # opt.seed: default=317 ;加上torch.manual_seed这个函数调用的话,打印出来的随机数每次都一样。 torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset( opt.dataset, opt.task ) # opt.dataset = coco, opt.task = ctdet (| ddd | multi_pose | exdet) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) # modified by zy # val_loader = torch.utils.data.DataLoader(Dataset(opt, 'test'), batch_size=1, shuffle=False, num_workers=1,pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) output_choice_log = '/home/zy/zy/2new_network/CenterNet-master/output_choice.log' if os.path.exists(output_choice_log): os.remove(output_choice_log) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' try: log_dict_train, _ = trainer.train( epoch, train_loader ) # !!!!!!!! train = self.run_epoch('train', epoch, data_loader) except Exception as e: # 如果发生异常,那就返回预设的loss值 print('Error_train!!!', e) print(traceback.format_exc()) continue logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): try: log_dict_val, preds = trainer.val(epoch, val_loader) except Exception as e: # 如果发生异常,那就返回预设的loss值 print('Error_train!!!', e) print(traceback.format_exc()) continue for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test print('Setting up data...') Dataset = get_dataset(opt.task) f = open(opt.data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() transforms = T.Compose([T.ToTensor()]) dataset = Dataset(opt, dataset_root, trainset_paths, (1088, 608), augment=True, transforms=transforms) opt = opts().update_dataset_info_and_set_heads(opt, dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) # Get dataloader train_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr if epoch % 5 == 0: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) logger.close()