def transforms(model_type="dpt_hybrid"): import cv2 from torchvision.transforms import Compose from dpt.models import DPTDepthModel from dpt.midas_net import MidasNet_large from dpt.transforms import Resize, NormalizeImage, PrepareForNet if model_type == "dpt_large": # DPT-Large normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) elif model_type == "dpt_hybrid": # DPT-Hybrid normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) else: raise ValueError( "Only support model type dpt_large or dpt_hybrid, dpt_hybrid for default setting" ) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), normalization, PrepareForNet(), ]) return transform
def preprocess(img_raw): net_w = 576 net_h = 384 func = Resize(net_w, net_h, resize_target=None, keep_aspect_ratio=False, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC) img = func({"image": img_raw}) normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) img = normalization(img) img = PrepareForNet()(img) img = img["image"] return img
def run(input_path, output_path, model_path, model_type="dpt_hybrid", optimize=True): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if model_type == "dpt_large": # DPT-Large net_w = net_h = 384 model = DPTDepthModel( path=model_path, backbone="vitl16_384", non_negative=True, enable_attention_hooks=args.vis, ) normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) elif model_type == "dpt_hybrid": # DPT-Hybrid net_w = net_h = 384 model = DPTDepthModel( path=model_path, backbone="vitb_rn50_384", non_negative=True, enable_attention_hooks=args.vis, ) normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) elif model_type == "dpt_hybrid_kitti": net_w = 1216 net_h = 352 model = DPTDepthModel( path=model_path, scale=0.00006016, shift=0.00579, invert=True, backbone="vitb_rn50_384", non_negative=True, enable_attention_hooks=args.vis, ) normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) elif model_type == "dpt_hybrid_nyu": net_w = 640 net_h = 480 model = DPTDepthModel( path=model_path, scale=0.000305, shift=0.1378, invert=True, backbone="vitb_rn50_384", non_negative=True, enable_attention_hooks=args.vis, ) normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) elif model_type == "midas_v21": # Convolutional model net_w = net_h = 384 model = MidasNet_large(model_path, non_negative=True) normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) else: assert ( False ), f"model_type '{model_type}' not implemented, use: --model_type [dpt_large|dpt_hybrid|dpt_hybrid_kitti|dpt_hybrid_nyu|midas_v21]" transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), normalization, PrepareForNet(), ]) model.eval() if optimize == True and device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): if os.path.isdir(img_name): continue print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = util.io.read_image(img_name) if args.kitti_crop is True: height, width, _ = img.shape top = height - 352 left = (width - 1216) // 2 img = img[top:top + 352, left:left + 1216, :] img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) if model_type == "dpt_hybrid_kitti": prediction *= 256 if model_type == "dpt_hybrid_nyu": prediction *= 1000.0 if args.vis: visualize_attention(sample, model, prediction, args.model_type) filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) util.io.write_depth(filename, prediction, bits=2) print("finished")
def dpt(input_path, output_path): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #load network: model_type == "dpt_hybrid_nyu" net_w = 640 net_h = 480 model = DPTDepthModel( path="dpt/weights/dpt_hybrid_nyu-2ce69ec7.pt", scale=0.000305, shift=0.1378, invert=True, backbone="vitb_rn50_384", non_negative=True, enable_attention_hooks=False, ) normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) transform = Compose( [ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), normalization, PrepareForNet(), ] ) model.eval() if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # predict depth for single image img_names = os.listdir(input_path+"single/") num_images = len(img_names) print("Monocular Depth Prediction (Single Images)") for idx, img_name in enumerate(img_names): # progress print(" processing {} ({}/{})".format(img_name, idx + 1, num_images)) img = img2np(input_path+"single/"+img_name) img_input = transform({"image": img})["image"] # prediction with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = ( torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ) .squeeze() .cpu() .numpy() ) df_pred = pd.DataFrame(prediction) df_pred.to_csv(output_path+"single/"+img_name.replace(".png",".csv"), index=False) # predict depth for single image img_names = os.listdir(input_path+"crop/") num_images = len(img_names) print("Monocular Depth Prediction (Cropped Images)") for idx, img_name in enumerate(img_names): # progress print(" processing {} ({}/{})".format(img_name, idx + 1, num_images)) img = img2np(input_path+"crop/"+img_name) img_input = transform({"image": img})["image"] # prediction with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = ( torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ) .squeeze() .cpu() .numpy() ) df_pred = pd.DataFrame(prediction) df_pred.to_csv(output_path+"crop/"+img_name.replace(".png",".csv"), index=False)
def transforms(): import cv2 from torchvision.transforms import Compose from dpt.transforms import Resize, NormalizeImage, PrepareForNet from dpt import transforms transforms.midas_transform = Compose( [ lambda img: {"image": img / 255.0}, Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), ] ) transforms.dpt_transform = Compose( [ lambda img: {"image": img / 255.0}, Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), PrepareForNet(), lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), ] ) transforms.kitti_transform = Compose( [ lambda img: {"image": img / 255.0}, Resize( 1216, 352, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), PrepareForNet(), lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), ] ) transforms.nyu_transform = Compose( [ lambda img: {"image": img / 255.0}, Resize( 640, 480, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), PrepareForNet(), lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), ] ) return transforms
def run(input_path, output_path, model_path, model_type="dpt_hybrid", optimize=True): """Run segmentation network Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) net_w = net_h = 480 # load network if model_type == "dpt_large": model = DPTSegmentationModel( 150, path=model_path, backbone="vitl16_384", ) elif model_type == "dpt_hybrid": model = DPTSegmentationModel( 150, path=model_path, backbone="vitb_rn50_384", ) else: assert ( False ), f"model_type '{model_type}' not implemented, use: --model_type [dpt_large|dpt_hybrid]" transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), PrepareForNet(), ]) model.eval() if optimize == True and device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = util.io.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() out = model.forward(sample) prediction = torch.nn.functional.interpolate(out, size=img.shape[:2], mode="bicubic", align_corners=False) prediction = torch.argmax(prediction, dim=1) + 1 prediction = prediction.squeeze().cpu().numpy() # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) util.io.write_segm_img(filename, img, prediction, alpha=0.5) print("finished")