def __init__(self, model_name="mono_1024x320"): self.model_name = model_name download_model_if_doesnt_exist(model_name) encoder_path = os.path.join("./monodepth2/models", model_name, "encoder.pth") depth_decoder_path = os.path.join("./monodepth2/models", model_name, "depth.pth") # LOADING PRETRAINED MODEL self.encoder = networks.ResnetEncoder(18, False) self.depth_decoder = networks.DepthDecoder( num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) loaded_dict_enc = torch.load(encoder_path, map_location='cpu') filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self.encoder.state_dict() } self.encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location='cpu') self.depth_decoder.load_state_dict(loaded_dict) self.encoder.eval() self.depth_decoder.eval() self.feed_height = loaded_dict_enc['height'] self.feed_width = loaded_dict_enc['width']
def __init__(self, model_path): assert isinstance(model_path, (str)) self.model_path = model_path encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING MODEL encoder = networks.ResnetEncoder(18, False) depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") loaded_dict_enc = torch.load(encoder_path, map_location=device) filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) self.encoder = encoder self.depth_decoder = depth_decoder self.feed_height = loaded_dict_enc['height'] self.feed_width = loaded_dict_enc['width']
def __init__(self, model_name, no_cuda): # Setup execution env if torch.cuda.is_available() and not no_cuda: self._device = torch.device("cuda") else: self._device = torch.device("cpu") # Get model download_model_if_doesnt_exist(model_name) dir_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(dir_path, "monodepth2", "models", model_name) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # Load encoder self._encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=self._device) # extract the height and width of image that this model was trained with self._feed_height = loaded_dict_enc['height'] self._feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self._encoder.state_dict() } self._encoder.load_state_dict(filtered_dict_enc) self._encoder.to(self._device) self._encoder.eval() # Load decoder self._depth_decoder = networks.DepthDecoder( num_ch_enc=self._encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=self._device) self._depth_decoder.load_state_dict(loaded_dict) self._depth_decoder.to(self._device) self._depth_decoder.eval() # ROS image subscriber and publiser self._img_pub = rospy.Publisher('monodepth2')
return x # ## Setting up Monodepth model # We build our monocular depth estimation model from the Monodepth module # Define which model to use and download if not found model_name = "mono_640x192" download_model_if_doesnt_exist(model_name) # Build paths to coders and instantiate from path encoder_path = os.path.join("models", model_name, "encoder.pth") depth_decoder_path = os.path.join("models", model_name, "depth.pth") encoder = networks.ResnetEncoder(18, False).cuda() depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)).cuda() # Encoder and Decoder loading loaded_dict_enc = torch.load(encoder_path, map_location='cpu') filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) loaded_dict = torch.load(depth_decoder_path, map_location='cpu') depth_decoder.load_state_dict(loaded_dict) # Put the coders in evaluation mode encoder.eval() depth_decoder.eval()
def test_simple(image_path, image_size, model_name): """Function to predict for a single image or folder of images """ device = torch.device("cpu") download_model_if_doesnt_exist(model_name) model_path = os.path.join("models", model_name) print("-> Loading model from ", model_path) encoder_path = os.path.join(model_path, "encoder.pth") depth_decoder_path = os.path.join(model_path, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading pretrained encoder") encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=device) # extract the height and width of image that this model was trained with feed_height = loaded_dict_enc['height'] feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict() } encoder.load_state_dict(filtered_dict_enc) encoder.to(device) encoder.eval() print(" Loading pretrained decoder") depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=device) depth_decoder.load_state_dict(loaded_dict) depth_decoder.to(device) depth_decoder.eval() # FINDING INPUT IMAGES if os.path.isfile(image_path): # Only testing on a single image paths = [image_path] output_directory = os.path.dirname(image_path) elif os.path.isdir(image_path): # Searching folder for images paths = glob.glob(os.path.join(image_path, '*.jpg')) output_directory = image_path else: raise Exception("Can not find image_path: {}".format(image_path)) print("-> Predicting on {:d} test images".format(len(paths))) # PREDICTING ON EACH IMAGE IN TURN with torch.no_grad(): for idx, image_path in enumerate(paths): if image_path.endswith("_disp.jpg"): # don't try to predict disparity for a disparity image! continue # Load image and preprocess input_image = pil.open(image_path).resize(image_size).convert( 'RGB') original_width, original_height = input_image.size input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS) input_image = transforms.ToTensor()(input_image).unsqueeze(0) # PREDICTION input_image = input_image.to(device) features = encoder(input_image) outputs = depth_decoder(features) disp = outputs[("disp", 0)] disp_resized = torch.nn.functional.interpolate( disp, (original_height, original_width), mode="bilinear", align_corners=False) # Saving numpy file output_name = os.path.splitext(os.path.basename(image_path))[0] name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name)) scaled_disp, _ = disp_to_depth(disp, 0.1, 100) np.save(name_dest_npy, scaled_disp.cpu().numpy()) # Saving colormapped depth image disp_resized_np = disp_resized.squeeze().cpu().numpy() vmax = np.percentile(disp_resized_np, 95) normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax) mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) im = pil.fromarray(colormapped_im) name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name)) im.save(name_dest_im) print(" Processed {:d} of {:d} images - saved prediction to {}". format(idx + 1, len(paths), name_dest_im)) print('-> Done!') return colormapped_im