def init_model(transform): # set torch options torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True model_path = "../MiDaS/model-f46da743.pt" print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose( [ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.to(device) model.eval() return (model, transform, device), None
def init_model(transform): parser = argparse.ArgumentParser() parser.add_argument('-mw', '--model_weights', default='model-f6b98070.pt', help='path to the trained weights of model' ) parser.add_argument('-mt', '--model_type', default='large', help='model type: large or small' ) parser.add_argument('--optimize', dest='optimize', action='store_true') parser.add_argument('--no-optimize', dest='optimize', action='store_false') parser.set_defaults(optimize=True) args, unknown = parser.parse_known_args() # set torch options torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if args.model_type == "large": model_path = "../MiDaS/"+args.model_weights model = MidasNet(model_path, non_negative=True) net_w, net_h = 384, 384 elif args.model_type == "small": if "small" not in args.model_weights: args.model_weights = "model-small-70d6b9c8.pt" model_path = "../MiDaS/"+args.model_weights model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) net_w, net_h = 256, 256 else: print(f"model_type '{model_type}' not implemented, use: --model_type large") assert False transform = Compose( [ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.eval() if args.optimize==True: rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) return (model, transform, device, args.optimize), args
def run(input_path, output_path, model_path, model_type="large", optimize=True): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if model_type == "large": model = MidasNet(model_path, non_negative=True) net_w, net_h = 384, 384 elif model_type == "small": model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) net_w, net_h = 256, 256 else: print( f"model_type '{model_type}' not implemented, use: --model_type large" ) assert False transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.eval() if optimize == True: rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) prediction /= 1000 # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print(prediction) print(prediction.shape) print("finished")
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print("finished")
def run(model_path): """ Run MonoDepthNN to compute depth maps. """ # set torch options torch.cuda.empty_cache() torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # select device device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() cam = cv2.VideoCapture(0) cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 360) cam.set(cv2.CAP_PROP_FPS, 30) while True: t = time.time() _, left_img = cam.read() image = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGB) / 255.0 # Apply transforms image = transform({"image": image})["image"] # Predict and resize to original resolution with torch.no_grad(): image = torch.from_numpy(image).to(device).unsqueeze(0) depth = model.forward(image) depth = (torch.nn.functional.interpolate( depth.unsqueeze(1), size=left_img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) depth_map = write_depth(depth, bits=2, reverse=False) right_img = generate_stereo(left_img, depth_map) anaglyph = overlap(left_img, right_img) cv2.imshow("anaglyph", anaglyph) fps = 1. / (time.time() - t) print('\rframerate: %f fps' % fps, end='') cv2.waitKey(1)
class MiDaSROS: def __init__(self): '''Initialize ros publisher, ros subscriber''' # topic where we publish self.bridge = CvBridge() self.image_depth_pub = rospy.Publisher("/midas/depth/image_raw", Image, queue_size=1) self.image_rgb_pub = rospy.Publisher("/midas/rgb/image_raw", Image, queue_size=1) self.camera_info_pub = rospy.Publisher("/midas/camera_info", CameraInfo, queue_size=1) # subscribed Topic self.subscriber = rospy.Subscriber("/midas_rgb/image_raw", Image, self.callback, queue_size=1) # setup image display self.display_rgb = False self.display_depth = True # initialize Intel MiDas self.initialized_midas = False rospack = rospkg.RosPack() ros_pkg_path = rospack.get_path('intelisl_midas_ros') model_path = os.path.join(ros_pkg_path, 'src/model-f6b98070.pt') self.model = MidasNet(model_path, non_negative=True) self.device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") self.model.to(self.device) self.model.eval() rospy.loginfo('Loaded Intel MiDaS') midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms") self.transform = midas_transforms.default_transform rospy.loginfo('Initialized Intel MiDaS transform') self.initialized_midas = True def show_image(self, img, window_name="Image Window"): cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.imshow(window_name, img) cv2.waitKey(2) def callback(self, img_msg): # conversion to OpenCV and the correct color img = cv2.cvtColor( self.bridge.imgmsg_to_cv2(img_msg, desired_encoding='passthrough'), cv2.COLOR_BGR2RGB) if self.display_rgb: self.show_image(img, window_name='Ground Truth RGB') # convert RGB to depth using MiDaS if self.initialized_midas: input_batch = self.transform(img).to(self.device) with torch.no_grad(): prediction = self.model(input_batch) prediction = torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze() # scale pixel values to display omax, omin = prediction.max(), prediction.min() prediction = (prediction - omin) / (omax - omin) # convert depth prediction to numpy output = prediction.cpu().numpy() if self.display_depth: self.show_image(output, window_name='Estimated Depth') # setup message (depth) depth_msg = self.bridge.cv2_to_imgmsg(output, encoding="passthrough") # setup message camera info camera_info_msg = CameraInfo() camera_info_msg.header.stamp = img_msg.header.stamp camera_info_msg.height = img.shape[0] camera_info_msg.width = img.shape[1] # publish self.image_depth_pub.publish(depth_msg) self.image_rgb_pub.publish(img_msg) self.camera_info_pub.publish(camera_info_msg)
def run(model_path): """ Run MonoDepthNN to compute depth maps. """ # Input images img_list = os.listdir(args.input) img_list.sort() # output dir output_dir = './depth' os.makedirs(output_dir, exist_ok=True) # set torch options torch.cuda.empty_cache() torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # select device device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() for idx in tqdm(range(len(img_list))): sample = img_list[idx] raw_image = cv2.imread(os.path.join(args.input, sample)) raw_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0 # Apply transforms image = transform({"image": raw_image})["image"] # Predict and resize to original resolution with torch.no_grad(): image = torch.from_numpy(image).to(device).unsqueeze(0) prediction = model.forward(image) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=raw_image.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) depth_map = write_depth(prediction, bits=2, reverse=False) cv2.imwrite( os.path.join(output_dir, 'MiDaS_{}.png'.format(sample.split('.')[0])), depth_map)
1, 'stride': [32, 16, 8] } plane_segmentation_cfg = { "meta_data_path": "G:/EVA5/ToGit/Planercnn/content/planercnn/test/inference/" } device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) midas_model = MidasNet(r"G:\EVA5\ToGit\model-f6b98070.pt", non_negative=True) midas_model.eval() midas_model.to(device) #print(midas_model) print("Model Loaded") # model = CustomNet("model-f46da743.pt", non_negative=True, yolo_cfg=yolo_cfg) model = CustomNet("G:\EVA5\ToGit\yolov3-spp-ultralytics.pt", non_negative=True, yolo_cfg=yolo_cfg) model.gr = 1.0 model.hyp = hyp model.to(device) #print(model) # freeze(model, base=True)