def __init__(self, options): self.opt = options # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = "cuda" if self.opt.threeinput: self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, pretrained=True, num_input_channels=8) self.models["encoder"].to(self.device) else: self.models["encoder"] = networks.ResnetEncoder( self.opt.num_layers, pretrained=True) self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc, num_output_channels=1) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) self.models["confidence"] = ConfidenceDecoder( self.models["encoder"].num_ch_enc, num_output_channels=1) self.models["confidence"].to(self.device) self.set_dataset() self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\t ", self.opt.split) self.load_model() self.MIN_DEPTH = 1e-3 self.MAX_DEPTH = 80 self.STEREO_SCALE_FACTOR = 5.4 self.sfnormOptimizer = SurfaceNormalOptimizer( height=self.opt.crph, width=self.opt.crpw, batch_size=self.opt.batch_size).cuda() self.variancebar = torch.from_numpy(variancebar).cuda().float() self.variancebar[self.variancebar > 0] = self.variancebar[ self.variancebar > 0] / self.opt.variancefold from integrationModule import IntegrationFunction self.integrationFunction = IntegrationFunction.apply
def __init__(self): super().__init__() self.num_classes = 9 self.cam_thresh = 0.9 self.seg_ratio = 0.2 self.encoder = networks.ResnetEncoder(18, False) self.encoder_cls = networks.ResnetEncoder(18, True) model_name = 'mono+stereo_640x192' model_path = os.path.join("models", model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") loaded_dict_enc = torch.load(encoder_path, map_location=device) feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict()} self.encoder.load_state_dict(filtered_dict_enc) # self.encoder.eval(); self.decoder = networks.DepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(5), num_output_channels=self.num_classes) self.depth_decoder = DebugDepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) self.depth_decoder.load_state_dict(loaded_dict) # self.decoder.eval(); self.encoder_out_channels = self.encoder.num_ch_enc[-1] self.classifer_conv1 = nn.Conv2d(self.encoder_out_channels, 1024, 3, padding=1) self.classifer_drop1 = nn.Dropout2d(p=0.5) self.classifer_conv2 = nn.Conv2d(1024, self.num_classes, 1, bias=False) # self.seg_conv1 = nn.Conv2d(self.encoder_out_channels, self.num_classes, 1) self.train_loss = [] self.loss_decomp = {'cls':[], 'seed':[], 'dCRF':[]} self.val_loss = [] self.test_loss = [] self.rloss_weight = 2e-9 #2e-9 self.rloss_scale = 0.5 self.rloss_sig_rgb = 15 self.rloss_sig_xy = 100 self.lr = 1e-3 self.densecrflosslayer = DenseCRFLoss(weight=self.rloss_weight, sigma_rgb=self.rloss_sig_rgb, sigma_xy=self.rloss_sig_xy, scale_factor=self.rloss_scale)
def monodepth2_init(self,args): self.encoder_path = args.encoder_path self.depth_decoder_path = args.depth_decoder_path # encoder init self.encoder = networks.ResnetEncoder(18, False) self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device) self.feed_height = self.loaded_dict_enc['height'] self.feed_width = self.loaded_dict_enc['width'] self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()} self.encoder.load_state_dict(self.filtered_dict_enc) self.encoder.to(self.device) self.encoder.eval() # decoder self.depth_decoder = networks.DepthDecoder2([64, 64, 128, 256, 512]) self.loaded_dict_dec = torch.load(self.depth_decoder_path, map_location=self.device) self.filtered_dict_dec = {k: v for k, v in self.loaded_dict_dec.items() if k in self.depth_decoder.state_dict()} self.depth_decoder.load_state_dict(self.filtered_dict_dec) self.depth_decoder.to(self.device) self.depth_decoder.eval() ## inputs size if args.feed_height and args.feed_width: self.feed_height = args.feed_height self.feed_width = args.feed_width else: self.feed_height = self.loaded_dict_enc['height'] self.feed_width = self.loaded_dict_enc['width']
def __init__(self, options): self.opt = options # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.device = "cuda" self.depthmodels = {} self.depthmodels["depthencoder"] = networks.ResnetEncoder( self.opt.num_layers, pretrained=False, num_input_channels=3) self.depthmodels["depthdecoder"] = DepthDecoder( self.depthmodels["depthencoder"].num_ch_enc, num_output_channels=1) self.depthmodels["depthencoder"].to(self.device) self.depthmodels["depthdecoder"].to(self.device) self.load_model(weightFolder=self.opt.load_depthweights_folder, encoderName='depthencoder', decoderName='depthdecoder', encoder=self.depthmodels["depthencoder"], decoder=self.depthmodels["depthdecoder"]) for m in self.depthmodels.values(): m.eval() print("Training is using:\t", self.device) self.set_dataset() self.MIN_DEPTH = 1e-3 self.MAX_DEPTH = 80 self.STEREO_SCALE_FACTOR = 5.4
def __init__(self, options): self.opt = options self.log_path = os.path.join(self.opt.log_dir, self.opt.model_name) # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = "cuda" self.models["encoder"] = networks.ResnetEncoder(self.opt.num_layers, pretrained=True) self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc, num_output_channels=2) self.models["depth"].to(self.device) self.parameters_to_train += list(self.models["depth"].parameters()) self.model_optimizer = optim.Adam(self.parameters_to_train, self.opt.learning_rate) self.model_lr_scheduler = optim.lr_scheduler.StepLR( self.model_optimizer, self.opt.scheduler_step_size, 0.1) print("Training model named:\t", self.opt.model_name) print("Models and tensorboard events files are saved to:\t", self.opt.log_dir) print("Training is using:\t", self.device) self.set_dataset() self.writers = {} for mode in ["train", "val"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\t ", self.opt.split) print("There are {:d} training items and {:d} validation items".format( self.train_num, self.val_num)) if self.opt.load_weights_folder is not None: self.load_model() self.save_opts() self.MIN_DEPTH = 1e-3 self.MAX_DEPTH = 80 self.best_abs = 1e10 self.sfnormOptimizer = SurfaceNormalOptimizer( height=self.opt.height, width=self.opt.width, batch_size=self.opt.batch_size).cuda()
def __init__(self, options): self.opt = options # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.device = "cuda" self.models["encoder_norm"] = networks.ResnetEncoder( self.opt.num_layers, pretrained=True) self.models["encoder_norm"].to(self.device) self.models["norm"] = networks.DepthDecoder( self.models["encoder_norm"].num_ch_enc, num_output_channels=2) self.models["norm"].to(self.device) self.set_dataset() self.load_model() self.crph = 365 self.crpw = 1220 os.makedirs(self.opt.output_path, exist_ok=True) self.dirmapping = {'l': 'image_02', 'r': 'image_03'}
def __init__(self, options): self.opt = options # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = "cuda" self.models["encoder_norm"] = networks.ResnetEncoder(self.opt.num_layers, pretrained=True) self.models["encoder_norm"].to(self.device) self.models["norm"] = DepthDecoder(self.models["encoder_norm"].num_ch_enc, num_output_channels=3) self.models["norm"].to(self.device) self.set_dataset() self.depth_metric_names = ["de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3"] self.MIN_DEPTH = 1e-3 self.MAX_DEPTH = 80 self.minabsrel = 1e10 self.maxa1 = -1e10 self.STEREO_SCALE_FACTOR = 5.4 self.sfnormOptimizer = SurfaceNormalOptimizer(height=self.opt.crph, width=self.opt.crpw, batch_size=self.opt.batch_size).cuda()
def __init__(self, lr=7e-3, encoder_depth=18): super().__init__() self.num_classes = 9 self.encoder = networks.ResnetEncoder(encoder_depth, True) self.decoder = networks.DepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(5), num_output_channels=self.num_classes) self.depth_decoder = DebugDepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) # loaded_dict = torch.load(depth_decoder_path, map_location=device) # self.depth_decoder.load_state_dict(loaded_dict) # self.decoder.eval(); self.encoder_out_channels = self.encoder.num_ch_enc[-1] self.train_loss = [] self.loss_decomp = {'seed': [], 'dCRF': []} self.val_loss = [] self.test_loss = [] self.rloss_weight = 2e-9 #2e-9 self.rloss_scale = 0.5 self.rloss_sig_rgb = 15 self.rloss_sig_xy = 100 self.lr = lr self.densecrflosslayer = DenseCRFLoss(weight=self.rloss_weight, sigma_rgb=self.rloss_sig_rgb, sigma_xy=self.rloss_sig_xy, scale_factor=self.rloss_scale)
def loadModel(model_name, epoch_num): # Set up network and load weights model_path = join(abspath('./logs'), model_name) opts_path = join(model_path, 'models/opt.json') weights_path = join(model_path, 'models', 'weights_{}'.format(epoch_num)) # Load pretrained model options with open(opts_path, 'r') as f: opts = json.load(f) encoder = networks.ResnetEncoder(opts['num_layers'], False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=opts['scales']) encoder_path = join(weights_path, 'encoder.pth') depth_decoder_path = join(weights_path, 'depth.pth') # Load encoder network with weights. Verify encoder architecture loaded_dict_enc = torch.load(encoder_path) filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} encoder.load_state_dict(filtered_dict_enc) # Load depth decoder network with weights loaded_dict = torch.load(depth_decoder_path) depth_decoder.load_state_dict(loaded_dict) # Set to eval mode on GPU encoder.cuda() depth_decoder.cuda() encoder.eval() depth_decoder.eval() return encoder, depth_decoder, opts
def LoadDepthModels(image_name, image): modelsDict = { 'CAM_BACK.jpeg': 'monoback', 'CAM_FRONT.jpeg': 'monofront', 'CAM_FRONT_LEFT.jpeg': 'monofrontleft', 'CAM_FRONT_RIGHT.jpeg': 'monofrontright', 'CAM_BACK_LEFT.jpeg': 'monobackleft', 'CAM_BACK_RIGHT.jpeg': 'monobackright' } model_path = os.path.join("models", modelsDict[image_name]) #print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") #device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = "cuda" # LOADING PRETRAINED MODEL #print("Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() #print("Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() original_width, original_height = image.size input_image = image.resize((feed_width, feed_height), Image.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) #print(disp_resized.shape) _, depth = disp_to_depth(disp_resized, 0.1, 100) #print(depth.shape) #print(type(depth.squeeze(0))) return depth.squeeze(0)
def getMonoDepth(input_image): if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") loc=baseLoc+'monodepth2/' model_path = os.path.join(loc+"models", 'mono+stereo_640x192') encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() with torch.no_grad(): input_image = pil.fromarray(input_image) # input_image = pil.open(image_path).convert('RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) vmin = disp_resized_np.min() disp_resized_np = vmin + (disp_resized_np - vmin) * (vmax - vmin) / (disp_resized_np.max() - vmin) disp_resized_np = (255 * (disp_resized_np - vmin) / (vmax - vmin)).astype(np.uint8) colormapped_im = cv2.applyColorMap(disp_resized_np, cv2.COLORMAP_HOT) colormapped_im = cv2.cvtColor(colormapped_im, cv2.COLOR_BGR2RGB) # normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax) # mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') # colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) return colormapped_im
def depth_Estimation(args): model_name = args.model_name #Setting up the network print("Loading model....") download_model_if_doesnt_exist(model_name) encoder_path = os.path.join("models", model_name, "encoder.pth") depth_decoder_path = os.path.join("models", model_name, "depth.pth") # LOADING PRETRAINED MODEL encoder = networks.ResnetEncoder(18, False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict_enc = torch.load(encoder_path, map_location='cpu') filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location='cpu') depth_decoder.load_state_dict(loaded_dict) encoder.eval() depth_decoder.eval(); #Loading image print("Loading image....") image_path = args.image_path input_image = pil.open(image_path).convert('RGB') original_width, original_height = input_image.size feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] input_image_resized = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image_pytorch = transforms.ToTensor()(input_image_resized).unsqueeze(0) input_npy = input_image_pytorch.squeeze().cpu().numpy() #prediction of disparity image with torch.no_grad(): features = encoder(input_image_pytorch) outputs = depth_decoder(features) disp = outputs[("disp", 0)] #Scaling for given resolution disp_resized = torch.nn.functional.interpolate(disp, (original_height, original_width), mode="bilinear", align_corners=False) # interpolate the values in to fit the given resolution of the image disp_resized_np = disp_resized.squeeze().cpu().numpy() # Converting tensor in pytorch to numpy array print("resized disp" + str(disp_resized_np.shape)) print("Range of Depth in image") scaled,dep = disp_to_depth(disp_resized_np,0.1,1000) # resizing the depth from 0.1 to 1000 units print("min->"+str(dep.min())+"mx->"+str(dep.max())) #Preview of the rgb and Depth images rgb = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB) depth = dep.reshape((rgb.shape[0],rgb.shape[1]),order='C') plot(rgb,depth) return rgb,depth
def network_define(opt, data_path, height, width): opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) filenames = readlines(os.path.join(splits_dir, opt.eval_split, split_file)) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path, map_location=torch.device("cuda:1")) if opt.dataset_val[0] == "kitti": dataset = datasets.KITTIRAWDataset(data_path, filenames, height, width, [0], 4, is_train=False) elif opt.dataset_val[0] == "vkitti": dataset = datasets.VKITTIDataset(data_path, filenames, height, width, [0], 4, is_train=False) # dataloader = DataLoader(dataset, 16, shuffle=False, num_workers=opt.num_workers, # pin_memory=True, drop_last=False) dataloader = DataLoader( dataset, 1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False, collate_fn=my_collate_fn ) ## the default collate_fn will fail because there are non-deterministic length sample encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict( torch.load(decoder_path, map_location=torch.device("cuda:1"))) encoder.cuda(1) encoder.eval() depth_decoder.cuda(1) depth_decoder.eval() return encoder, depth_decoder, dataloader, filenames
def initUi(self): self.num=0 self.timer_camera = QTimer() # 定义定时器,用于控制显示视频的帧率 self.timer_camera2 = QTimer() self.resize(640, 480) self.setWindowTitle("test_simple") self.centralwidget = QWidget() self.label_show_camera = QLabel(self.centralwidget) # 定义显示视频的Label self.label_show_camera.setFixedSize(640, 480) self.label_show_camera.setGeometry(0, 0, 640, 480) self.setCentralWidget(self.centralwidget) self.timer_camera.timeout.connect(self.__show_camera__) self.timer_camera2.timeout.connect(self.__show_rate__) self.picture_path = "./picture/test.jpg" #self.encoder_path = "/home/wang/models/mono+stereo_640x192/encoder.pth" #self.depth_decoder_path = "/home/wang/models/mono+stereo_640x192/depth.pth" self.encoder_path = "/home/roit/models/monodepth2_official/mono_640x192/encoder.pth" self.depth_decoder_path = "/home/roit/models/monodepth2_official/mono_640x192/depth.pth" if torch.cuda.is_available(): self.device = torch.device("cuda") else: self.device = torch.device("cpu") print("-> device:",self.device) self.encoder = networks.ResnetEncoder(18, False) self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device) self.feed_height = self.loaded_dict_enc['height'] self.feed_width = self.loaded_dict_enc['width'] self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()} self.encoder.load_state_dict(self.filtered_dict_enc) self.encoder.to(self.device) self.encoder.eval() #decoder self.depth_decoder = networks.DepthDecoder2([64, 64, 128, 256, 512]) self.loaded_dict_dec = torch.load(self.depth_decoder_path, map_location=self.device) self.filtered_dict_dec = {k: v for k, v in self.loaded_dict_dec.items() if k in self.depth_decoder.state_dict()} self.depth_decoder.load_state_dict(self.filtered_dict_dec) self.depth_decoder.to(self.device) self.depth_decoder.eval() # self.paths = [self.picture_path] self.output_directory = os.path.dirname(self.picture_path) self.timer_camera.start(10 ) self.timer_camera2.start(1000)
def convert_pretrained(model, model_path, example_img, save_enc_name, save_dec_name): # Pretrained Weights encoder_path = os.path.join(model_path, model, "encoder.pth") depth_decoder_path = os.path.join(model_path, model, "depth.pth") # Model Architechture encoder = networks.ResnetEncoder(18, False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) # Load pretrained weights into model try: loaded_dict_enc = torch.load(encoder_path, map_location='cpu') except FileNotFoundError as err: print("{} Cannot load encoder file {}".format(err, encoder_path)) filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) try: loaded_dict = torch.load(depth_decoder_path, map_location='cpu') except FileNotFoundError as err: print("{} Cannot load decoder file {}".format(err, depth_decoder_path)) depth_decoder.load_state_dict(loaded_dict) # Set to Eval mode encoder.eval() depth_decoder.eval() # Forward image = load_example_image(example_img, loaded_dict_enc) with torch.no_grad(): # Encoder gt_features = encoder.forward_original(image) features = encoder(image) verify_encoder(features, gt_features) # Decoder gt_outputs = depth_decoder.forward_original(features) outputs = depth_decoder(*features) verify_decoder(outputs, gt_outputs) # JIT Trace encoder_module = torch.jit.trace(encoder, image) depth_decoder_module = torch.jit.trace(depth_decoder, features) # Serialize & Save t_encoder_path = os.path.join(model_path, model, save_enc_name) t_depth_decoder_path = os.path.join(model_path, model, save_dec_name) encoder_module.save(t_encoder_path) depth_decoder_module.save(t_depth_decoder_path)
def load_encoder(encoder_path): encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()} encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() return encoder, feed_height, feed_width
def __init__(self, lr=7e-3, batch_size=1, width=640, height=192): super().__init__() self.num_classes = 9 self.model = DeepLab(num_classes=self.num_classes) self.depth_encoder = networks.ResnetEncoder(18, True) self.depth_decoder = networks.DepthDecoder( num_ch_enc=self.depth_encoder.num_ch_enc, scales=range(4)) model_name = 'mono+stereo_640x192' model_path = os.path.join("models", "monodepth2_weights", model_name) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") loaded_dict_enc = torch.load(encoder_path, map_location=device) feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self.depth_encoder.state_dict() } self.depth_encoder.load_state_dict(filtered_dict_enc) self.depth_encoder.eval() loaded_dict = torch.load(depth_decoder_path, map_location=device) self.depth_decoder.load_state_dict(loaded_dict) self.depth_decoder.eval() self.train_loss = [] self.loss_decomp = {'seed': [], 'dCRF': [], 'proj': []} self.val_loss = [] self.test_loss = [] self.rloss_weight = 2e-9 #2e-9 self.rloss_scale = 1 self.rloss_sig_rgb = 25 self.rloss_sig_xy = 30 self.ploss_weight = 0.5 self.lr = lr self.width = width self.height = height self.densecrflosslayer = DenseCRFLoss(weight=1, sigma_rgb=self.rloss_sig_rgb, sigma_xy=self.rloss_sig_xy, scale_factor=self.rloss_scale) self.backproject_depth = BackprojectDepth(batch_size, height, width) self.project_3d = Project3D(batch_size, height, width) self.ssim = SSIM() self.no_ssim = True self.use_depth_rloss = True
def __init__(self, cls_model, lr=7e-3, encoder_depth=18): super().__init__() self.num_classes = 9 self.encoder = networks.ResnetEncoder(encoder_depth, False) self.cls_model = cls_model self.cls_model.eval() model_name = 'mono+stereo_640x192' model_path = os.path.join("models", model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") loaded_dict_enc = torch.load(encoder_path, map_location=device) feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict() } self.encoder.load_state_dict(filtered_dict_enc) # self.encoder.eval(); self.decoder = networks.DepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(5), num_output_channels=self.num_classes) self.depth_decoder = DebugDepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) self.depth_decoder.load_state_dict(loaded_dict) # self.decoder.eval(); self.encoder_out_channels = self.encoder.num_ch_enc[-1] self.train_loss = [] self.loss_decomp = {'seed': [], 'dCRF': []} self.val_loss = [] self.test_loss = [] self.rloss_weight = 2e-9 #2e-9 self.rloss_scale = 0.5 self.rloss_sig_rgb = 15 self.rloss_sig_xy = 100 self.lr = lr self.densecrflosslayer = DenseCRFLoss(weight=self.rloss_weight, sigma_rgb=self.rloss_sig_rgb, sigma_xy=self.rloss_sig_xy, scale_factor=self.rloss_scale)
def __init__(self): super().__init__() self.num_classes = 9 self.cam_thresh = 0.9 self.encoder_cls = networks.ResnetEncoder(18, True) self.encoder_out_channels = self.encoder_cls.num_ch_enc[-1] self.classifer_conv1 = nn.Conv2d(self.encoder_out_channels, 1024, 3, padding=1) self.classifer_drop1 = nn.Dropout2d(p=0.5) self.classifer_conv2 = nn.Conv2d(1024, self.num_classes, 1, bias=False) self.train_loss = [] self.val_loss = [] self.test_loss = [] self.lr = 1e-3
def prepare_model_for_test(opt): opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda().eval() pose_decoder.cuda().eval() return pose_encoder, pose_decoder
def __init__(self, *args, **kwargs): super(Monodepth2, self).__init__(*args, **kwargs) self.load_dict = False if self.running_on == 'pc': #mono_1024x320 #mono_640x192 self.encoder_path = "/home/roit/models/monodepth2_official/mono_1024x320/encoder.pth" self.depth_decoder_path = "/home/roit/models/monodepth2_official/mono_1024x320/depth.pth" elif self.running_on == 'Xavier': self.encoder_path = "/home/wang/970evop1/models/mono_640x192/encoder.pth" self.depth_decoder_path = "/home/wang/970evop1/models/mono_640x192/depth.pth" # encoder init self.encoder = networks.ResnetEncoder(18, False) if self.load_dict: self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device) self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()} self.encoder.load_state_dict(self.filtered_dict_enc) self.encoder.to(self.device) self.encoder.eval() # decoder if self.name=='arch2': self.decoder = networks.DepthDecoder2([64, 64, 128, 256, 512]) else: self.decoder = networks.DepthDecoder([64, 64, 128, 256, 512]) if self.load_dict: self.loaded_dict_dec = torch.load(self.depth_decoder_path, map_location=self.device) self.filtered_dict_dec = {k: v for k, v in self.loaded_dict_dec.items() if k in self.decoder.state_dict()} self.decoder.load_state_dict(self.filtered_dict_dec) self.decoder.to(self.device) self.decoder.eval() ## inputs size # self.feed_height = self.loaded_dict_enc['height'] # self.feed_width = self.loaded_dict_enc['width'] print('==> model name:{}\nfeed_height:{}\nfeed_width:{}\n'.format(self.name,self.feed_height,self.feed_width))
def val(self): """Validate the model on a single minibatch """ self.set_eval() modelnames = ['intconstrainWallPoleBs', 'intconstrainWallPole', 'intconstrainWall', 'intconstrainPole', 'intconstrainPole2', 'intconstrainPole3', 'intconstrainPole4', 'intconstrainPole5'] for k in range(len(modelnames)): vlsroot = os.path.join(self.opt.vlsfold, modelnames[k]) os.makedirs(vlsroot, exist_ok=True) self.models["encoder"] = networks.ResnetEncoder(self.opt.num_layers, pretrained=True) self.models["encoder"].to(self.device) self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc, num_output_channels=1) self.models["depth"].to(self.device) models_to_load = ['encoder', 'depth'] for n in models_to_load: path = os.path.join(self.opt.load_weights_folder, modelnames[k],'models', 'best_a1_models', "{}.pth".format(n)) model_dict = self.models[n].state_dict() pretrained_dict = torch.load(path) pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) self.models[n].load_state_dict(model_dict) with torch.no_grad(): for batch_idx, inputs in enumerate(self.val_loader): for key, ipt in inputs.items(): if not key == 'tag': inputs[key] = ipt.to(self.device) _, _, gt_height, gt_width = inputs['depthgt'].shape outputs_depth = self.models['depth'](self.models['encoder'](inputs['color'])) _, pred_depth = disp_to_depth(outputs_depth[("disp", 0)], self.opt.min_depth, self.opt.max_depth) pred_depth = pred_depth * self.STEREO_SCALE_FACTOR pred_depth = F.interpolate(pred_depth, [gt_height, gt_width], mode='bilinear', align_corners=True) figname = "{}_{}.png".format(inputs['tag'][0].split(' ')[0].split('/')[1], inputs['tag'][0].split(' ')[1]) pred_depthnp = (pred_depth[0,0,:,:].cpu().numpy() * 256.0).astype(np.uint16) pil.fromarray(pred_depthnp).save(os.path.join(vlsroot, figname))
def prepare_model_for_test(opt): opt.load_weights_folder = os.path.expanduser(opt.load_weights_folder) print("-> Loading weights from {}".format(opt.load_weights_folder)) encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) decoder_dict = torch.load(decoder_path) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc, scales=range(1), upsample_mode='bilinear' ) encoder.load_state_dict({k: v for k, v in encoder_dict.items() if k in encoder.state_dict()}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda().eval() depth_decoder.cuda().eval() return encoder, depth_decoder, encoder_dict['height'], encoder_dict['width']
def init_model(model_name='mono+stereo_640x192'): if not model_dict['initialized']: download_model_if_doesnt_exist(model_name) model_path = os.path.join( f"{os.path.dirname(os.path.realpath(__file__))}/models", model_name) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() # extract the height and width of image that this model was trained with model_dict['feed_width'] = loaded_dict_enc['width'] model_dict['feed_height'] = loaded_dict_enc['height'] model_dict['encoder'] = encoder model_dict['decoder'] = depth_decoder model_dict['initialized'] = True
def setup_network(model_name="mono_640x192"): download_model_if_doesnt_exist(model_name) encoder_path = os.path.join("models", model_name, "encoder.pth") depth_decoder_path = os.path.join("models", model_name, "depth.pth") # LOADING PRETRAINED MODEL encoder = networks.ResnetEncoder(18, False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict_enc = torch.load(encoder_path, map_location='cpu') filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location='cpu') depth_decoder.load_state_dict(loaded_dict) encoder.eval() depth_decoder.eval() return encoder, depth_decoder, loaded_dict_enc
def load_model(): model_name = 'mono+stereo_640x192' device = torch.device("cuda") model_path = os.path.join("models", model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() return feed_width, feed_height, encoder, depth_decoder, device
def prepare_model_for_test(args, device): model_path = args.model_name print("-> Loading model from ", model_path) model_path = os.path.join("ckpts", model_path) encoder_path = os.path.join(model_path, "encoder.pth") decoder_path = os.path.join(model_path, "depth.pth") encoder_dict = torch.load(encoder_path, map_location=device) decoder_dict = torch.load(decoder_path, map_location=device) encoder = networks.ResnetEncoder(18, False) decoder = networks.DepthDecoder( num_ch_enc=encoder.num_ch_enc, scales=range(1), ) encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in encoder.state_dict()}) decoder.load_state_dict(decoder_dict) encoder = encoder.to(device).eval() decoder = decoder.to(device).eval() return encoder, decoder, encoder_dict['height'], encoder_dict['width']
def __init__(self, options): self.opt = options # checking height and width are multiples of 32 assert self.opt.height % 32 == 0, "'height' must be a multiple of 32" assert self.opt.width % 32 == 0, "'width' must be a multiple of 32" self.models = {} self.parameters_to_train = [] self.device = "cuda" self.models["encoder"] = networks.ResnetEncoder(self.opt.num_layers, pretrained=True) self.models["encoder"].to(self.device) self.parameters_to_train += list(self.models["encoder"].parameters()) self.models["depth"] = DepthDecoder(self.models["encoder"].num_ch_enc, num_output_channels=1) self.models["depth"].to(self.device) self.set_dataset() self.depth_metric_names = [ "de/abs_rel", "de/sq_rel", "de/rms", "de/log_rms", "da/a1", "da/a2", "da/a3" ] print("Using split:\t ", self.opt.split) if self.opt.load_weights_folder is not None: self.load_model() self.MIN_DEPTH = 1e-3 self.MAX_DEPTH = 80 self.STEREO_SCALE_FACTOR = 5.4
def load_model(model_name): # Set up network and load weights if model_name.startswith('office_trim'): models_path = abspath('./logs/office') else: models_path = abspath('./logs') weights_path = join(models_path, model_name, 'models', 'weights_{}'.format(epoch_num)) # Load pretrained model print('Loading... \nMODEL {}'.format(model_name)) encoder = networks.ResnetEncoder(18, False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(num_scales)) encoder_path = join(weights_path, 'encoder.pth') depth_decoder_path = join(weights_path, 'depth.pth') # Load encoder network with weights. Verify encoder architecture loaded_dict_enc = torch.load(encoder_path) filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) # Load depth decoder network with weights loaded_dict = torch.load(depth_decoder_path) depth_decoder.load_state_dict(loaded_dict) # Set to eval mode on GPU encoder.cuda() depth_decoder.cuda() encoder.eval() depth_decoder.eval() return depth_decoder, encoder
def evaluate(opt): """Evaluate odometry on the KITTI dataset """ assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \ "eval_split should be either odom_9 or odom_10" sequence_id = int(opt.eval_split.split("_")[1]) filenames = readlines( os.path.join(os.path.dirname(__file__), "splits", "odom", "test_files_{:02d}.txt".format(sequence_id))) dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() pred_poses = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input with torch.no_grad(): for inputs in dataloader: for key, ipt in inputs.items(): inputs[key] = ipt.cuda() all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) pred_poses.append( transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()) pred_poses = np.concatenate(pred_poses) gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id)) gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4) gt_global_poses = np.concatenate( (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) gt_global_poses[:, 3, 3] = 1 gt_xyzs = gt_global_poses[:, :3, 3] gt_local_poses = [] for i in range(1, len(gt_global_poses)): gt_local_poses.append( np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) ates = [] num_frames = gt_xyzs.shape[0] track_length = 5 for i in range(0, num_frames - 1): local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1])) gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1])) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) print("\n Trajectory error: {:0.3f}, std: {:0.3f}\n".format(np.mean(ates), np.std(ates))) save_path = os.path.join(opt.load_weights_folder, "poses.npy") np.save(save_path, pred_poses) print("-> Predictions saved to", save_path)