def save_model(optimizer, model, iter, prev_iter, prefix=''): makedir_if_not_exist(config.JOBS_MODEL_DIR) torch.save( model.state_dict(), os.path.join(config.JOBS_MODEL_DIR, '%smodel_iter_%d.bin' % (prefix, iter + prev_iter))) torch.save( optimizer.state_dict(), os.path.join(config.JOBS_MODEL_DIR, '%sopt_state_iter_%d.bin' % (prefix, iter + prev_iter)))
def path(key, date=None, ix=None, iy=None): ''' return paths for different ghcn dataset files create directories when they are requested ''' if key == 'url': return 'ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/grid/years/%d.tmax' % date.year elif key == 'base': return os.path.abspath(os.path.dirname(__file__)) elif key == 'data': return utils.makedir_if_not_exist(os.path.join(path('base'),'data')) elif key == 'yearly file': aux = 'tmp_%d.tmax' % (date.year,) return os.path.join(path('data'), aux) elif key == 'daily dir': aux = '%d' % (date.year,) ddir = os.path.join(path('base'),'data','daily',aux) return utils.makedir_if_not_exist(ddir) elif key == 'daily file': aux = '%d%02d%02d.pbz2' % (date.year,date.month,date.day) return os.path.join(path('daily dir', date), aux) elif key == 'computed': return utils.makedir_if_not_exist(os.path.join(path('data'),'computed')) elif key == 'trend': return os.path.join(path('computed'), 'trend_from_%s_to_%s.pbz2' % (str(date[0]), str(date[-1]))) elif key == 'bias': return os.path.join(path('computed'), 'bias_from_%s_to_%s.pbz2' % (str(date[0]), str(date[-1]))) elif key == 'results': return os.path.join(path('base'),'results') elif key == 'trend plot': return os.path.join(path('results'), 'trend_from_%s_to_%s.pdf' % (str(date[0]), str(date[-1]))) elif key == 'timeseries': return os.path.join(path('results'), 'trend_from_%s_to_%s_%d_%d.pdf' % (str(date[0]), str(date[-1]),ix,iy)) else: raise ValueError('wrong key: %s' % key)
cv2.putText(draw_rgb, 'FPS:' + str(fps), (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1) cv2.imshow("detection result rgb", draw_rgb) cv2.imshow("detection result ir", draw_ir) key = cv2.waitKey(1) if 'q' == chr(key & 255) or 'Q' == chr(key & 255) or 27 == key: # 27:Esc break # 释放VideoCapture对象 camera_rgb.release() camera_ir.release() cv2.destroyAllWindows() if __name__ == "__main__": utils.makedir_if_not_exist(sub_path) # print('datetime===', datetime) utils.makedir_if_not_exist(images_save_path) utils.makedir_if_not_exist(images_rgbs_save_path) utils.makedir_if_not_exist(images_rgbs_orgs_save_path) utils.makedir_if_not_exist(images_rgbs_rects_save_path) utils.makedir_if_not_exist(images_rgbs_anns_save_path) utils.makedir_if_not_exist(images_nirs_save_path) utils.makedir_if_not_exist(images_nirs_orgs_save_path) utils.makedir_if_not_exist(images_nirs_rects_save_path) utils.makedir_if_not_exist(images_nirs_anns_save_path) testcamera_nir()
criterion, max_iter=args.num_iters, verbal=True, b_vis_depth=args.vis_depth) test_dataset = DataSet(csv_filename='../data/' + args.test_file, b_resnet_prep=b_resnet_prep) test_data_loader = data.DataLoader(test_dataset, batch_size=1, num_workers=1, shuffle=False, collate_fn=relative_depth_collate_fn) print "Testing on %s" % args.test_file test_rel_error = valid.valid(model, test_data_loader, criterion, max_iter=args.num_iters, in_thresh=in_thresh, b_vis_depth=args.vis_depth, verbal=True) # test_rel_error = valid.valid(model, test_data_loader, criterion, max_iter = args.num_iters, b_vis_depth=args.vis_depth, verbal=True) model.train() if args.output_file is not None: makedir_if_not_exist(os.path.dirname(args.output_file)) save_obj( { 'val_rel_error': val_rel_error, 'test_rel_error': test_rel_error }, args.output_file)
def train(dataset_name, model_name, loss_name,\ n_GPUs, b_oppi, b_data_aug, b_sort, b_diff_lr,\ train_file, valid_file,\ learning_rate, num_iters, num_epoches,\ batch_size, num_loader_workers, pretrained_file,\ model_save_interval, model_eval_interval): NetworkType = {'ReDWebNet': ReDWebNet_resnet50} LossType = {"RelativeLoss": RelativeLoss} # create (and load) model. Should wrap with torch.nn.parallel.DistributedDataParallel before loading pretraiend model (https://github.com/pytorch/examples/blob/master/imagenet/main.py) model = NetworkType[model_name]().cuda() b_resnet_prep = model_name == 'ReDWebNet' if n_GPUs > 1: print "######################################################" print "Using %d GPUs, batch_size is %d" % (n_GPUs, batch_size) print "######################################################" model = torch.nn.parallel.DataParallel(model) print 'num_loader_workers:', num_loader_workers # resume from a checkpoint model prev_iter = 0 if pretrained_file: model.load_state_dict( torch.load(os.path.join(config.JOBS_MODEL_DIR, pretrained_file))) prev_iter = get_prev_iter(pretrained_file) print "Prev_iter: {}".format(prev_iter) # set up criterion and optimizer if loss_name == 'L2_loss': t_collate_fn = metric_depth_collate_fn criterion = torch.nn.MSELoss() else: t_collate_fn = relative_depth_collate_fn criterion = LossType[loss_name](b_sort=b_sort) if b_diff_lr and b_resnet_prep: print( "==========================================================================" ) print(" Use different learning rates for different part of the model") print( " The learning rate for the ResNet encoder is 10x smaller than decoder." ) print( "==========================================================================" ) optimizer = optim.RMSprop([ { 'params': model.resnet_model.parameters(), 'lr': learning_rate / 10.0 }, { 'params': model.feafu3.parameters() }, { 'params': model.feafu2.parameters() }, { 'params': model.feafu1.parameters() }, { 'params': model.ada_out.parameters() }, ], lr=learning_rate) else: optimizer = optim.RMSprop(model.parameters(), lr=learning_rate) try: if pretrained_file: print pretrained_file optimizer.load_state_dict( torch.load( os.path.join( config.JOBS_MODEL_DIR, pretrained_file.replace('model_', 'opt_state_')))) except: print( "Exception happens when trying to load optimizer state, possibility due to different learning rate strategy." ) # register dataset type DatasetsType = { "YoutubeDataset": { 'train_dataset': YoutubeDataset, 'val_dataset': YoutubeDatasetVal, 't_val_dataset': YoutubeDatasetVal }, "RelativeDepthDataset": { 'train_dataset': RelativeDepthDataset, 'val_dataset': RelativeDepthDataset, 't_val_dataset': RelativeDepthDataset }, "DIWDataset": { 'train_dataset': DIWDataset, 'val_dataset': DIWDatasetVal, 't_val_dataset': DIWDatasetVal }, "YT_DIW": { 'train_dataset': YoutubeDataset, 'val_dataset': DIWDatasetVal, 't_val_dataset': YoutubeDatasetVal }, "ReDWeb_DIW": { 'train_dataset': ReDWebDataset, 'val_dataset': DIWDatasetVal, 't_val_dataset': ReDWebDatasetVal }, "SceneNet_DIW": { 'train_dataset': SceneNetDataset, 'val_dataset': DIWDatasetVal, 't_val_dataset': SceneNetDatasetVal }, "SceneNetMetric_DIW": { 'train_dataset': SceneNetDataset_Metric, 'val_dataset': DIWDatasetVal, 't_val_dataset': SceneNetDataset_MetricVal }, "YTmixReD_DIW": { 'train_dataset': YTmixReDWebDataset, 'val_dataset': DIWDatasetVal, 't_val_dataset': YTmixReDWebDatasetVal } } # create dataset t_dataset = DatasetsType[dataset_name]['train_dataset']( csv_filename='../data/' + train_file, b_data_aug=b_data_aug, b_resnet_prep=b_resnet_prep, b_oppi=b_oppi) v_dataset = DatasetsType[dataset_name]['val_dataset']( csv_filename='../data/' + valid_file, b_resnet_prep=b_resnet_prep) tv_dataset = DatasetsType[dataset_name]['t_val_dataset']( csv_filename='../data/' + train_file, b_resnet_prep=b_resnet_prep) t_data_loader = data.DataLoader(t_dataset, batch_size=batch_size, num_workers=num_loader_workers, shuffle=True, collate_fn=t_collate_fn) tv_data_loader = data.DataLoader(tv_dataset, batch_size=1, num_workers=0, shuffle=False, collate_fn=relative_depth_collate_fn) v_data_loader = data.DataLoader(v_dataset, batch_size=1, num_workers=0, shuffle=False, collate_fn=relative_depth_collate_fn) # create tensorboard logger logger = TBLogger.TBLogger(makedir_if_not_exist(config.JOBS_LOG_DIR)) logger.create_scalar('Training Loss') logger.create_scalar('Train WKDR') logger.create_scalar('Val WKDR') # logger.create_image('Dummy image') # logger.create_histogram('Dummy histogram') cv2.setNumThreads(0) iter = 1 best_v_WKDR = 100000 for epoch in range(num_epoches): print "==============epoch = ", epoch for step, (inputs, target, input_res) in enumerate(t_data_loader): if iter >= num_iters: break ###### zero gradient optimizer.zero_grad() ###### read in training data input_var = Variable(inputs.cuda()) if loss_name == 'L2_loss': target_var = Variable(target.cuda()) else: target_var = [Variable(a.cuda()) for a in target] ###### forwarding output_var = model(input_var) ###### get loss loss = criterion(output_var, target_var) print iter, loss.data[0] ###### back propagate loss.backward() optimizer.step() ###### save to log logger.add_value('Training Loss', loss.data[0], step=(iter + prev_iter)) if (iter + prev_iter) % model_save_interval == 0: save_model(optimizer, model, iter, prev_iter) if (iter + prev_iter) % model_eval_interval == 0: print "Evaluating at iter %d" % iter model.eval() if n_GPUs > 1: print "========================================validation set" v_rel_error = valid.valid(model.module, v_data_loader, criterion, in_thresh=0.0) print "========================================training set" t_rel_error = valid.valid(model.module, tv_data_loader, criterion, in_thresh=0.0, max_iter=500) else: print "========================================validation set" v_rel_error = valid.valid(model, v_data_loader, criterion, in_thresh=0.0) print "========================================training set" t_rel_error = valid.valid(model, tv_data_loader, criterion, in_thresh=0.0, max_iter=500) logger.add_value('Val WKDR', v_rel_error['WKDR_neq'], step=(iter + prev_iter)) logger.add_value('Train WKDR', t_rel_error['WKDR_neq'], step=(iter + prev_iter)) model.train() if best_v_WKDR > v_rel_error['WKDR_neq']: best_v_WKDR = v_rel_error['WKDR_neq'] save_model(optimizer, model, iter, prev_iter, prefix='best_') else: save_model(optimizer, model, iter, prev_iter) iter += 1 inputs = None target = None input_res = None if iter >= num_iters: break save_model(optimizer, model, iter, prev_iter)
parser.add_argument('--num_loader_workers', '-nlw', type=int, default=2) parser.add_argument('--pretrained_file', '-pf', default=None) parser.add_argument('--b_oppi', '-b_oppi', action='store_true', default=False) parser.add_argument('--b_sort', '-b_sort', action='store_true', default=False) parser.add_argument('--b_data_aug', '-b_data_aug', action='store_true', default=False) parser.add_argument('--b_diff_lr', '-b_diff_lr', action='store_true', default=False) # parser.add_argument('--debug', '-d', action='store_true') args = parser.parse_args() args_dict = vars(args) folder = makedir_if_not_exist(config.JOBS_DIR) save_obj(args_dict, os.path.join(config.JOBS_DIR, 'args.pkl')) train(**args_dict) print "End of train.py"
def train(dataset_name, model_name, loss_name, n_GPUs, b_oppi, b_data_aug, b_sort, \ train_file, valid_file,\ learning_rate, num_iters, num_epoches,\ batch_size, num_loader_workers, pretrained_file,\ model_save_interval, model_eval_interval, exp_name): NetworkType = { "NIPS":HourglassNetwork, "ReDWebNetReluMin": ReDWebNetReluMin, "ReDWebNetReluMin_raw": ReDWebNetReluMin_raw, } LossType = { "LocalBackprojLoss2": LocalBackprojLoss2, } DatasetsType = { "OASISDataset":{'train_dataset':OASISDataset, 'val_dataset':OASISDatasetVal, 't_val_dataset':OASISDataset}, } device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print("Using CUDA:", torch.cuda.is_available()) # create (and load) model. Should wrap with torch.nn.parallel.DistributedDataParallel before loading pretraiend model (https://github.com/pytorch/examples/blob/master/imagenet/main.py) model = NetworkType[model_name]().to(device) b_resnet_prep = model_name != 'NIPS' if n_GPUs > 1: print( "######################################################") print( "Using %d GPUs, batch_size is %d" % (n_GPUs, batch_size)) print( "######################################################") model = torch.nn.parallel.DataParallel(model) print ('num_loader_workers:', num_loader_workers) # resume from a checkpoint model prev_iter = 0 if pretrained_file: model.load_state_dict(torch.load( pretrained_file )) prev_iter = get_prev_iter(pretrained_file) print ("Prev_iter: {}".format(prev_iter)) # set up criterion and optimizer criterion = LossType[loss_name]() optimizer = optim.RMSprop(model.parameters(), lr=learning_rate) try: if pretrained_file: print (pretrained_file) optimizer.load_state_dict(torch.load( pretrained_file.replace('model_', 'opt_state_') )) except: print("Exception happens when trying to load optimizer state, possibly due to different learning rate strategy.") # create dataset t_dataset = DatasetsType[dataset_name]['train_dataset']( csv_filename= train_file, b_data_aug = b_data_aug, b_resnet_prep = b_resnet_prep, b_oppi = b_oppi ) v_dataset = DatasetsType[dataset_name]['val_dataset']( csv_filename= valid_file, b_resnet_prep = b_resnet_prep ) tv_dataset = DatasetsType[dataset_name]['t_val_dataset']( csv_filename= train_file, b_resnet_prep = b_resnet_prep ) t_data_loader = data.DataLoader(t_dataset, batch_size=batch_size, num_workers=num_loader_workers, shuffle=True, collate_fn = OASIS_collate_fn) tv_data_loader = data.DataLoader(tv_dataset, batch_size=1, num_workers=0, shuffle=False, collate_fn = OASIS_collate_fn) v_data_loader = data.DataLoader(v_dataset, batch_size=1, num_workers=0, shuffle=False, collate_fn = OASIS_collate_fn) # create tensorboard logger logger = TBLogger.TBLogger(makedir_if_not_exist(config.JOBS_LOG_DIR)) cv2.setNumThreads(0) iter = 1 best_v_WKDR = float('inf') best_siv = float('inf') for epoch in range(num_epoches): print ("==============epoch = ", epoch) for step, (inputs, metric_depth, surface_ids, target, _, focals, names) in enumerate(t_data_loader): if iter >= num_iters: break ###### zero gradient optimizer.zero_grad() ###### read in training data input_var = inputs.to(device) metric_depth_var = [a.to(device) for a in metric_depth] surface_ids_var = surface_ids.to(device) focals_gt_var = focals.to(device) # TODO target_var = [a.to(device) for a in target] ###### forwarding output_var, focal_pred_var = model(input_var) # TODO: remove if iter % 3000 == 0 and dataset_name != 'DIWDataset' : try: # pred_depth = np.exp(output_var.cpu().detach().numpy()) # when the network is predicting log depth. pred_depth = output_var.cpu().detach().numpy() # when the network is predicting absolute depth c = surface_ids.cpu().detach().numpy() _p_img = vis_depth_by_surface(pred_depth[0,0,:,:], c[0,0,:,:]) _p_full_img = vis_depth(pred_depth[0,0,:,:], c[0,0,:,:] > 0) logger.add_image('train/pred_depth_surface', torch.from_numpy(_p_img), (iter + prev_iter), dataformats="HW") logger.add_image('train/pred_depth', torch.from_numpy(_p_full_img), (iter + prev_iter), dataformats="HW") if b_resnet_prep: print("ResNet Prep") out_color = inputs[0].cpu().detach().numpy() out_color[0,:,:] = (out_color[0,:,:] * 0.229 + 0.485 ) *255.0 out_color[1,:,:] = (out_color[1,:,:] * 0.224 + 0.456 ) *255.0 out_color[2,:,:] = (out_color[2,:,:] * 0.225 + 0.406 ) *255.0 out_color = out_color.astype(np.uint8) logger.add_image('train/img', torch.from_numpy(out_color), (iter + prev_iter), dataformats="CHW") else: logger.add_image('train/img', inputs[0], (iter + prev_iter), dataformats="CHW") try: b = metric_depth[0].cpu().detach().numpy() _gt_img = vis_depth_full(b, c[0,0,:,:] > 0) logger.add_image('train/gt_depth', torch.from_numpy(_gt_img), (iter + prev_iter), dataformats="HW") except: b = np.zeros((240,320), dtype= np.uint8) logger.add_image('train/gt_depth', torch.from_numpy(b), (iter + prev_iter), dataformats="HW") print("No data for gt depth.") except Exception as e: print(str(e)) ###### get loss if loss_name in ["LocalBackprojLoss", "LocalBackprojLoss2", "BackprojLoss", "BackprojLoss2" ]: loss = criterion(preds = output_var, gts = metric_depth_var, surface_ids = surface_ids_var, focal_gts = focals_gt_var, focal_preds = focal_pred_var) print(iter + prev_iter, "Total_loss: %g" % loss.item()) if math.isnan(loss.item()): import sys sys.exit() if loss.item() > 1e+8: print(names) ###### save to log logger.add_value('train/Loss', loss.item(), step=(iter + prev_iter) ) ###### back propagate loss.backward() optimizer.step() if (iter + prev_iter) % model_save_interval == 0: save_model(optimizer, model, iter, prev_iter) if (iter + prev_iter) % model_eval_interval == 0: print ("Evaluating at iter %d" % iter) model.eval() if n_GPUs > 1: print ("========================================validation set") v_rel_error, _, _, v_LSIVRMSE = valid2.valid(model.module, v_data_loader, criterion, in_thresh=0.0) print ("========================================training set") t_rel_error, _, _, t_LSIVRMSE = valid2.valid(model.module, tv_data_loader, criterion, in_thresh=0.0, max_iter=500) else: print ("========================================validation set") v_rel_error, _, _, v_LSIVRMSE = valid2.valid(model, v_data_loader, criterion, in_thresh=0.0, max_iter=500) print ("========================================training set") t_rel_error, _, _, t_LSIVRMSE = valid2.valid(model, tv_data_loader, criterion, in_thresh=0.0, max_iter=500) logger.add_value('train/WKDR', t_rel_error['WKDR_neq'], step=(iter + prev_iter)) logger.add_value('train/LSIV_RMSE', t_LSIVRMSE["LSIV"], step=(iter + prev_iter)) logger.add_value('val/WKDR', v_rel_error['WKDR_neq'], step=(iter + prev_iter) ) logger.add_value('val/LSIV_RMSE', v_LSIVRMSE["LSIV"], step=(iter + prev_iter)) model.train() if best_v_WKDR > v_rel_error['WKDR_neq']: best_v_WKDR = v_rel_error['WKDR_neq'] save_model(optimizer, model, iter, prev_iter, prefix = 'best_rel') if best_siv > v_LSIVRMSE["LSIV"]: best_siv = v_LSIVRMSE["LSIV"] save_model(optimizer, model, iter, prev_iter, prefix = 'best_siv') save_model(optimizer, model, iter, prev_iter) iter += 1 inputs = None target = None if iter >= num_iters: break save_model(optimizer, model, iter, prev_iter)
cv2.imshow("detection result ir", frame_ir) key = cv2.waitKey(1) if 'q' == chr(key & 255) or 'Q' == chr(key & 255) or 27 == key: break if 'p' == chr(key & 255): samplingPositive = not samplingPositive writePath_rgb = videos_rgbs_1_save_path if samplingPositive else videos_rgbs_0_save_path writePath_nir = videos_nirs_1_save_path if samplingPositive else videos_nirs_0_save_path videoWriter_rgb = None if 32 == key: #空格 starting = not starting # 释放VideoCapture对象 camera_rgb.release() camera_ir.release() cv2.destroyAllWindows() if __name__ == "__main__": utils.makedir_if_not_exist(videos_save_path) utils.makedir_if_not_exist(videos_rgbs_save_path) utils.makedir_if_not_exist(videos_nirs_save_path) utils.makedir_if_not_exist(videos_rgbs_1_save_path) utils.makedir_if_not_exist(videos_rgbs_0_save_path) utils.makedir_if_not_exist(videos_nirs_1_save_path) utils.makedir_if_not_exist(videos_nirs_0_save_path) testcamera_nir()
draw_rgb = frame_rgb.copy() draw_ir = frame_ir.copy() for b in total_boxes_rgb: p1 = (int(b[0]), int(b[1])) p2 = (int(b[2]), int(b[3])) cv2.rectangle(draw_rgb, p1, p2, (0, 255, 0)) p1_ir = (int(b[0]) + 20, int(b[1]) - 10) p2_ir = (int(b[2]) + 20, int(b[3]) - 10) cv2.rectangle(draw_ir, p1_ir, p2_ir, (0, 255, 0)) cv2.putText(draw_rgb, 'FPS:' + str(fps), (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1) cv2.putText(draw_rgb, 'starting:' + str(starting), (25, size[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1) cv2.imshow("detection result rgb", draw_rgb) cv2.imshow("detection result ir", draw_ir) key = cv2.waitKey(1) if 'q' == chr(key & 255) or 'Q' == chr(key & 255) or 27 == key: break # 释放VideoCapture对象 camera_rgb.release() camera_ir.release() cv2.destroyAllWindows() if __name__ == "__main__": utils.makedir_if_not_exist(videos_save_path) testcamera_nir()