def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cpu") print("device: %s" % device) # load network model = MonoDepthNet(model_path) model.to(device) model.eval() # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = utils.resize_image(img) img_input = img_input.to(device) # compute with torch.no_grad(): out = model.forward(img_input) depth = utils.resize_depth(out, img.shape[1], img.shape[0]) torch.save({ 'img': img, 'dpt': depth }, os.path.join(output_path, img_name.split('/')[-1].split('.')[0] + '.pth')) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, depth, bits=2) print("finished")
def run(input_path, output_path, model_path, median_filter=False): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ runner = Runner(model_path) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) prediction = runner.predict_depth(img) if median_filter: prediction = runner.weighted_filtering(img, prediction) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print("finished")
def process_video(self, filename, dir_name): cap = cv2.VideoCapture(filename) fps = cap.get(cv2.CAP_PROP_FPS) count = 0 while cap.isOpened(): ret, frame = cap.read() if ret: if len(frame.shape) == 2: img = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) / 255.0 prediction = self.process_images(img) # output out_filename = os.path.join( dir_name, str(count / 30)[0] ) utils.write_depth(out_filename, prediction, bits=2) count += fps cap.set(1, count) else: cap.release() break
def run(input_path, output_path, model_path, model_type="large", optimize=True, input_video=True): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ predictor = DepthPredictor(model_type, model_path, optimize) # get input print("start processing") if input_video: vid_names = glob.glob(os.path.join(input_path, "*.mp4")) for ind, vid_name in enumerate(vid_names): dir_name = os.path.join(output_path, os.path.splitext(os.path.basename(vid_name))[0]) os.makedirs(dir_name, exist_ok=True) predictor.process_video(vid_name, dir_name) else: img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) prediction = predictor.process_images(img) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0] ) utils.write_depth(filename, prediction, bits=2) print("finished")
def postprocess(results, output_dir, img_path, model_name): # 检查输出目录 if not os.path.exists(output_dir): os.mkdir(output_dir) # 读取输入图像 img = cv2.imread(img_path) h, w = img.shape[:2] # 缩放回原尺寸 output = cv2.resize(results[0], (w, h), interpolation=cv2.INTER_CUBIC) # 可视化输出 pfm_f, png_f = write_depth(os.path.join(output_dir, model_name), output, bits=2)
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = "CUDA:0" #device = "CPU" print("device: %s" % device) # load network print("loading model...") model = onnx.load(model_path) print("checking model...") onnx.checker.check_model(model) print("preparing model...") tf_rep = onnx_tf.backend.prepare(model, device) print('inputs:', tf_rep.inputs) print('outputs:', tf_rep.outputs) resize_image = Resize( 384, 384, resize_target=None, keep_aspect_ratio=False, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ) def compose2(f1, f2): return lambda x: f2(f1(x)) transform = compose2(resize_image, PrepareForNet()) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute output = tf_rep.run(img_input.reshape(1, 3, 384, 384)) prediction = np.array(output).reshape(384, 384) prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0] ) utils.write_depth(filename, prediction, bits=2) print("finished")
def run(input_path, output_path, model_path): """ <<< >>> """ print(f"Starting Operations") # Select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Running on device: {device}") #Load Model model = MidasNet(model_path) transform = Compose([Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC ), NormalizeImage( mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225] ), PrepareForNet() ] ) model.to(device) model.eval() # get input img_names = os.listdir(input_path) img_names = [image for image in img_names if image.endswith((".jpg", "jpeg"))] num_img = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("Start Processing Images") for idx, img_name in enumerate(img_names): print(f"[{idx+1}/{num_img}] Processing {img_name}") img = utils.read_image(os.path.join(input_path, img_name)) img_input = transform({"image": img})["image"] with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False).squeeze().cpu().numpy() ) filename = os.path.join(output_path, img_name.split('.')[0]) utils.write_depth(filename, prediction) print(f"Finished processing {img_name}") print("Finished Processing")
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # the runtime initialization will not allocate all memory on the device to avoid out of GPU memory gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: #tf.config.experimental.set_memory_growth(gpu, True) tf.config.experimental.set_virtual_device_configuration( gpu, [ tf.config.experimental.VirtualDeviceConfiguration( memory_limit=4000) ]) except RuntimeError as e: print(e) # load network graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(model_path, 'rb') as f: graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') model_operations = tf.compat.v1.get_default_graph().get_operations() input_node = '0:0' output_layer = model_operations[len(model_operations) - 1].name + ':0' print("Last layer name: ", output_layer) resize_image = Resize( 384, 384, resize_target=None, keep_aspect_ratio=False, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ) def compose2(f1, f2): return lambda x: f2(f1(x)) transform = compose2(resize_image, PrepareForNet()) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") with tf.compat.v1.Session() as sess: try: # load images for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute prob_tensor = sess.graph.get_tensor_by_name(output_layer) prediction, = sess.run(prob_tensor, {input_node: [img_input]}) prediction = prediction.reshape(384, 384) prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) except KeyError: print( "Couldn't find input node: ' + input_node + ' or output layer: " + output_layer + ".") exit(-1) print("finished")
def run(input_path, output_path, model_path, model_type="large", optimize=True): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if model_type == "large": model = MidasNet(model_path, non_negative=True) net_w, net_h = 384, 384 elif model_type == "small": model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) net_w, net_h = 256, 256 else: print( f"model_type '{model_type}' not implemented, use: --model_type large" ) assert False transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.eval() if optimize == True: rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) prediction /= 1000 # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print(prediction) print(prediction.shape) print("finished")
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = Mynet(model_path, non_negative=True) model.inference = True transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction, _ = model.forward( sample) # the model outputs depth_images and yolo_layers prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print("finished")
def run(img_paths, depth_paths, model_path, vis_input=False): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input num_images = len(img_paths) print("start processing {} images".format(num_images)) for ind, (img_name, depth_name) in enumerate(zip(img_paths, depth_paths)): try: print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # create output folder os.makedirs(os.path.dirname(depth_name), exist_ok=True) # output utils.write_depth(depth_name, prediction, bits=1, color=img if vis_input else None) except Exception as e: print(e) print("finished")
def run(input_path, output_path, model_path, model_type="large"): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = "CUDA:0" #device = "CPU" print("device: %s" % device) # network resolution if model_type == "large": net_w, net_h = 384, 384 elif model_type == "small": net_w, net_h = 256, 256 else: print(f"model_type '{model_type}' not implemented, use: --model_type large") assert False # load network print("loading model...") model = rt.InferenceSession(model_path) input_name = model.get_inputs()[0].name output_name = model.get_outputs()[0].name resize_image = Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=False, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ) def compose2(f1, f2): return lambda x: f2(f1(x)) transform = compose2(resize_image, PrepareForNet()) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0] prediction = np.array(output).reshape(net_h, net_w) prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0] ) utils.write_depth(filename, prediction, bits=2) print("finished")
def run(model_path): """Run MonoDepthNN to compute depth maps. Args: model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose( [ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.to(device) model.eval() cap = cv2.VideoCapture(1) print("is camera open", cap.isOpened()) cap.set(3,320) cap.set(4,240) print("start processing") i = 0 while cap.isOpened(): start = time.time() ret, frame = cap.read() print("new frame", ret) p1 = time.time() print(f"take a picture {p1 - start}") if ret: img = utils.process_camera_img(frame) img_input = transform({"image": img})["image"] p2 = time.time() print(f"transoform image {p2 - p1}") # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) p3 = time.time() print(f"from numpy to cuda {p3 - p2}") prediction = model.forward(sample) p4 = time.time() print(f"prediction {p4 - p3}") prediction = ( torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ) .squeeze() .cpu() .numpy() ) p5 = time.time() print(f"prediction from cuda to cpu {p5 - p4}") # output r = random.randint(0, 10000) cv2.imwrite(f"output/input-{i}-{r}.png", frame) utils.write_depth(f"output/depth-{i}-{r}", prediction, bits=2) p6 = time.time() print(f"save input and write depth {p6 - p5}") cv2.imshow('frame', frame) cv2.imshow('prediction', prediction) p7 = time.time() print(f"show images {p7 - p6}") i += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break else: print("Camera is not recording") print(f"image took {time.time() - start} s") print("\n-----------------------\n") # When everything done, release the capture cap.release() cv2.destroyAllWindows() print("finished")