def imagenet_input(is_training): """Data reader for imagenet. Reads in imagenet data and performs pre-processing on the images. Args: is_training: bool specifying if train or validation dataset is needed. Returns: A batch of images and labels. """ if is_training: dataset = dataset_factory.get_dataset('imagenet', 'train', FLAGS.dataset_dir) else: dataset = dataset_factory.get_dataset('imagenet', 'validation', FLAGS.dataset_dir) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=is_training, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) image_preprocessing_fn = preprocessing_factory.get_preprocessing( 'mobilenet_v1', is_training=is_training) image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) images, labels = tf.train.batch([image, label], batch_size=FLAGS.batch_size, num_threads=4, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding(labels, FLAGS.num_classes) return images, labels
def main(opt): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.debug = max(opt.debug, 0) print(opt.dataset) Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) Detector = detector_factory[opt.task] detector = Detector(opt) image_names = load_benchmark_file(opt) p_output = opt.data_dir + "/" + opt.output with open(p_output, "w") as f: # f.write("img,id,class,tlx,tly,brx,bry,score\n") for (image_name) in image_names: image_path = opt.data_dir + "/images/" + image_name try: img = cv2.imread(image_path) image_height, image_width, _ = img.shape except Exception as e: print("file read error, path ", image_path) continue print("try to detect on ", image_path) ret = detector.run(image_path) detection = ret['results'] for key, items in detection.items(): for i in range(items.shape[0]): cx = (items[i, 0] + items[i, 2]) / 2.0 cy = (items[i, 1] + items[i, 3]) / 2.0 w = items[i, 2] - items[i, 0] h = items[i, 3] - items[i, 1] cx /= image_width w /= image_width cy /= image_height h /= image_height line = image_name + "," + str(cx) + "," + str(cy) + "," + str(w) + "," + str(h) + "," + str( items[i, 4]) + "," + str(key) + "\n" f.write(line) if len(detection) == 0: f.write(image_name + "\n")
def run(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test print('Setting up data...') Dataset = get_dataset(opt.dataset, opt.task, opt.multi_scale) # if opt.task==mot -> JointDataset f = open(opt.data_cfg ) # choose which dataset to train '../src/lib/cfg/mot15.json', data_config = json.load(f) trainset_paths = data_config['train'] # 训练集路径 dataset_root = data_config['root'] # 数据集所在目录 print("Dataset root: %s" % dataset_root) f.close() # Image data transformations transforms = T.Compose([T.ToTensor()]) # Dataset dataset = Dataset(opt=opt, root=dataset_root, paths=trainset_paths, img_size=opt.input_wh, augment=True, transforms=transforms) opt = opts().update_dataset_info_and_set_heads(opt, dataset) print("opt:\n", opt) logger = Logger(opt) # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str # 多GPU训练 # print("opt.gpus_str: ", opt.gpus_str) opt.device = torch.device('cuda:0' if opt.gpus[0] >= 0 else 'cpu') # 设置GPU #opt.device = device #NC UPDATE - fallback to original fairmot #opt.gpus = my_visible_devs #NC UPDATE print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) # 初始化优化器 optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) # Get dataloader if opt.is_debug: if opt.multi_scale: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, drop_last=True) # debug时不设置线程数(即默认为0) else: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=True, drop_last=True) # debug时不设置线程数(即默认为0) else: if opt.multi_scale: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True) else: train_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=True, drop_last=True) # debug时不设置线程数(即默认为0) print('Starting training...') Trainer = train_factory[opt.task] trainer = Trainer(opt=opt, model=model, optimizer=optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) # trainer.set_device(opt.gpus, opt.chunk_sizes, device) #NC UPDATE best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' # Train an epoch log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) else: # mcmot_last_track or mcmot_last_det if opt.id_weight > 0: # do tracking(detection and re-id) save_model( os.path.join(opt.save_dir, 'mcmot_last_track_' + opt.arch + '.pth'), epoch, model, optimizer) else: # only do detection # save_model(os.path.join(opt.save_dir, 'mcmot_last_det_' + opt.arch + '.pth'), # epoch, model, optimizer) save_model( os.path.join(opt.save_dir, 'mcmot_last_det_' + opt.arch + '.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr if epoch % 10 == 0: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = False # torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print(opt.device) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv, opt.deform_conv, w2=opt.w2, maxpool=opt.maxpool) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) # quantization-aware fine-tuning quantize_shufflenetv2_dcn(model, quant_conv=4, quant_bn=None, quant_act=8, wt_quant_mode='symmetric', act_quant_mode='asymmetric', wt_per_channel=True, wt_percentile=True, act_percentile=False, deform_backbone=False, w2=opt.w2, maxpool=opt.maxpool) # quantized_model = quantize_sfl_dcn(model, quant_conv=4, quant_bn=None, quant_act=4, # quant_mode='symmetric', wt_per_channel=True, wt_percentile=False, act_percentile=False) # print(quantized_model) # if opt.load_model != '': # model, optimizer, start_epoch = load_model( # model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) # if opt.test: # # if True: # _, preds = trainer.val(0, val_loader) # val_loader.dataset.run_eval(preds, opt.save_dir) # return train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr opt.test = True if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': logger.write(f"loading model: {opt.load_model}, opt.resume: {opt.resume}") model, optimizer, start_epoch = load_model( model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') val_loader = torch.utils.data.DataLoader( Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True ) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir, logger) return train_loader = torch.utils.data.DataLoader( Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True ) print('Starting training...') best = float("-inf") if uses_coco_eval(opt) else 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): if uses_coco_eval(opt): best = do_eval(opt, epoch, model, Dataset, logger, best) else: log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary("val_{}".format(k), v, epoch) logger.write("{} {:8f} | ".format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model( os.path.join(opt.save_dir, "model_best.pth"), epoch, model ) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def h4d_init(self, args=""): """h4d customized init, to work with centernet detection notebook""" opt = self.parse(args) Dataset = get_dataset(opt.dataset, opt.task) opt = self.update_dataset_info_and_set_heads(opt, Dataset) return opt
def main(opt, opt_t): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test if opt.target_dataset: Dataset_target = get_dataset(opt_t.target_dataset, opt_t.task) opt_t = opts().update_dataset_info_and_set_heads( opt_t, Dataset_target) # target dataset Dataset_source = get_dataset(opt.source_dataset, opt.task) opt = opts().update_dataset_info_and_set_heads( opt, Dataset_source) # source dataset print(opt) logger = Logger(opt) # record os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) # create model optimizer = torch.optim.Adam(model.parameters(), opt.lr) # create optimizer start_epoch = 0 if opt.load_model != '': # load model model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] # set trainer function trainer = Trainer(opt, model, optimizer) # initial trainer trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up source_val data...') val_source_loader = torch.utils.data.DataLoader(Dataset_source(opt, 'val'), batch_size=1, shuffle=False, num_workers=0, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_source_loader) val_source_loader.dataset.run_eval(preds, opt.save_dir) return # source loader print('Setting up source_train data...') train_source_loader = torch.utils.data.DataLoader( Dataset_source(opt, 'train'), # modify SOURCE dataset parameters batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) if opt.target_dataset: # target loader print('Setting up target_train data...') train_target_loader = torch.utils.data.DataLoader( Dataset_target(opt_t, 'train'), # modify TARGET dataset parameters batch_size=opt_t.batch_size, shuffle=True, num_workers=opt_t.num_workers, pin_memory=True, drop_last=True) print('DA MODE') else: train_target_loader = None print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_source_loader, train_target_loader) # do train logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): # log information logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) # if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: # save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), # save last-model # epoch, model, optimizer) # with torch.no_grad(): # log_dict_val, preds = trainer.val(epoch, val_source_loader) # cal val-set loss # for k, v in log_dict_val.items(): # logger.scalar_summary('val_{}'.format(k), v, epoch) # logger.write('{} {:8f} | '.format(k, v)) # if log_dict_val[opt.metric] < best: # best = log_dict_val[opt.metric] # save_model(os.path.join(opt.save_dir, 'model_best.pth'), # save best-model # epoch, model) # else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: # update learning rate save_model( os.path.join( opt.save_dir, 'model_{}.pth'.format(epoch)), # save lr_step-model epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) # benchmark=True 自动寻找最适合当前配置的高效算法,来达到优化运行效率的wento torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) def adjust_learning_rate(optimizer, epoch): # use warmup if epoch < 5: lr = opt.lr * ((epoch + 1) / 5) else: # use cosine lr PI = 3.14159 lr = opt.lr * 0.5 * (1 + math.cos(epoch * PI / 20)) # print(1111) for param_group in optimizer.param_groups: param_group['lr'] = lr os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) # optimizer = torch.optim.Adam(model.parameters(), opt.lr) optimizer = torch.optim.SGD(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') # num_worker=0是主进程读取; >0使用多进程读取,子进程读取数据时,训练程序会卡住,GPU utils为0, val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): adjust_learning_rate(optimizer, epoch) mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % 2 == 0 and epoch > 10: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) # test(opt) # opt.model = None if epoch > 20: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) # elif 80 < epoch <=100 and epoch % 3 == 0: # save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), # epoch, model, optimizer) logger.write('\n') # if epoch in opt.lr_step: # save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), # epoch, model, optimizer) # lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) # print('Drop LR to', lr) # for param_group in optimizer.param_groups: # param_group['lr'] = lr print('Epoch is Finished') logger.close()
def main(opt): torch.manual_seed( opt.seed ) # opt.seed: default=317 ;加上torch.manual_seed这个函数调用的话,打印出来的随机数每次都一样。 torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset( opt.dataset, opt.task ) # opt.dataset = coco, opt.task = ctdet (| ddd | multi_pose | exdet) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) # modified by zy # val_loader = torch.utils.data.DataLoader(Dataset(opt, 'test'), batch_size=1, shuffle=False, num_workers=1,pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) output_choice_log = '/home/zy/zy/2new_network/CenterNet-master/output_choice.log' if os.path.exists(output_choice_log): os.remove(output_choice_log) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' try: log_dict_train, _ = trainer.train( epoch, train_loader ) # !!!!!!!! train = self.run_epoch('train', epoch, data_loader) except Exception as e: # 如果发生异常,那就返回预设的loss值 print('Error_train!!!', e) print(traceback.format_exc()) continue logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): try: log_dict_val, preds = trainer.val(epoch, val_loader) except Exception as e: # 如果发生异常,那就返回预设的loss值 print('Error_train!!!', e) print(traceback.format_exc()) continue for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test print('Setting up data...') Dataset = get_dataset(opt.task) f = open(opt.data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() transforms = T.Compose([T.ToTensor()]) dataset = Dataset(opt, dataset_root, trainset_paths, (1088, 608), augment=True, transforms=transforms) opt = opts().update_dataset_info_and_set_heads(opt, dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) # Get dataloader train_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr if epoch % 5 == 0: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) logger.close()