def __init__(self, model_path, device_name='cuda'): if device_name == 'cuda' and not torch.cuda.is_available(): print("WARN: cuda was selected as device but was not found") device_name = 'cpu' self.device = torch.device(device_name) print(f"device: {device_name}") self.model = MidasNet(model_path, non_negative=True) self.preprocessor = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) self.model.to(self.device) self.model.eval()
def test(model, rgb_path, output_path): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) img = utils.read_image(rgb_path) img_input = transform({"image": img})["image"] path, file = os.path.split(rgb_path) file = f"{file.split('.')[0]}.png" depth_path = os.path.join(output_path, file) if output_path else os.path.join( path, f"out_{file}") print(f"{rgb_path} -> {depth_path}") with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction = model.forward(sample).cpu() # Output values are not in valid pixel range (uint8, 0-255) so we need to rescale pred_min = prediction.min() pred_max = prediction.max() prediction = 255 * (prediction - pred_min) / (pred_max - pred_min) # Output is inverse depth map, need to compare to normal depth maps so need to flip depth values. prediction = (255 - prediction).abs() matplotlib.image.imsave( depth_path, prediction.view(prediction.size(1), prediction.size(2)).data.numpy())
def run(basedir, input_path, output_path, model_path, resize_height=288): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") img0 = [os.path.join(basedir, 'images', f) \ for f in sorted(os.listdir(os.path.join(basedir, 'images'))) \ if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')][0] sh = cv2.imread(img0).shape height = resize_height factor = sh[0] / float(height) width = int(round(sh[1] / factor)) _minify(basedir, resolutions=[[height, width]]) # select device device = torch.device("cuda") print("device: %s" % device) small_img_dir = input_path + '_*x' + str(resize_height) + '/' print(small_img_dir) small_img_path = sorted(glob.glob(glob.glob(small_img_dir)[0] + '/*.png'))[0] small_img = cv2.imread(small_img_path) print('small_img', small_img.shape) # Portrait Orientation if small_img.shape[0] > small_img.shape[1]: input_h = 640 input_w = int( round(float(input_h) / small_img.shape[0] * small_img.shape[1])) # Landscape Orientation else: input_w = 640 input_h = int( round(float(input_w) / small_img.shape[1] * small_img.shape[0])) print('Monocular depth input_w %d input_h %d ' % (input_w, input_h)) # load network model = MidasNet(model_path, non_negative=True) transform_1 = Compose([ Resize( input_w, input_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_AREA, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = sorted(glob.glob(os.path.join(input_path, "*"))) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind in range(len(img_names)): img_name = img_names[ind] print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = read_image(img_name) img_input_1 = transform_1({"image": img})["image"] # compute with torch.no_grad(): sample_1 = torch.from_numpy(img_input_1).to(device).unsqueeze(0) prediction = model.forward(sample_1) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=[small_img.shape[0], small_img.shape[1]], mode="nearest", ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) if VIZ: if not os.path.exists('./midas_otuputs'): os.makedirs('./midas_otuputs') plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.imshow(img) plt.subplot(1, 2, 2) plt.imshow(prediction, cmap='jet') plt.savefig('./midas_otuputs/%s' % (img_name.split('/')[-1])) plt.close() print(filename + '.npy') np.save(filename + '.npy', prediction.astype(np.float32)) print("finished")
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print("finished")
def run(model_path): """Run MonoDepthNN to compute depth maps. Args: model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose( [ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.to(device) model.eval() cap = cv2.VideoCapture(1) print("is camera open", cap.isOpened()) cap.set(3,320) cap.set(4,240) print("start processing") i = 0 while cap.isOpened(): start = time.time() ret, frame = cap.read() print("new frame", ret) p1 = time.time() print(f"take a picture {p1 - start}") if ret: img = utils.process_camera_img(frame) img_input = transform({"image": img})["image"] p2 = time.time() print(f"transoform image {p2 - p1}") # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) p3 = time.time() print(f"from numpy to cuda {p3 - p2}") prediction = model.forward(sample) p4 = time.time() print(f"prediction {p4 - p3}") prediction = ( torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ) .squeeze() .cpu() .numpy() ) p5 = time.time() print(f"prediction from cuda to cpu {p5 - p4}") # output r = random.randint(0, 10000) cv2.imwrite(f"output/input-{i}-{r}.png", frame) utils.write_depth(f"output/depth-{i}-{r}", prediction, bits=2) p6 = time.time() print(f"save input and write depth {p6 - p5}") cv2.imshow('frame', frame) cv2.imshow('prediction', prediction) p7 = time.time() print(f"show images {p7 - p6}") i += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break else: print("Camera is not recording") print(f"image took {time.time() - start} s") print("\n-----------------------\n") # When everything done, release the capture cap.release() cv2.destroyAllWindows() print("finished")