Exemplo n.º 1
0
def MiDaS(pretrained=True, **kwargs):
    """ # This docstring shows up in hub.help()
    MiDaS model for monocular depth estimation
    pretrained (bool): load pretrained weights into model
    """

    model = MidasNet()

    if pretrained:
        checkpoint = (
            "https://github.com/intel-isl/MiDaS/releases/download/v2/model-f46da743.pt"
        )
        state_dict = torch.hub.load_state_dict_from_url(checkpoint,
                                                        progress=True,
                                                        check_hash=True)
        model.load_state_dict(state_dict)

    return model
Exemplo n.º 2
0
    def __init__(self):
        '''Initialize ros publisher, ros subscriber'''
        # topic where we publish
        self.bridge = CvBridge()
        self.image_depth_pub = rospy.Publisher("/midas/depth/image_raw",
                                               Image,
                                               queue_size=1)
        self.image_rgb_pub = rospy.Publisher("/midas/rgb/image_raw",
                                             Image,
                                             queue_size=1)
        self.camera_info_pub = rospy.Publisher("/midas/camera_info",
                                               CameraInfo,
                                               queue_size=1)

        # subscribed Topic
        self.subscriber = rospy.Subscriber("/midas_rgb/image_raw",
                                           Image,
                                           self.callback,
                                           queue_size=1)

        # setup image display
        self.display_rgb = False
        self.display_depth = True

        # initialize Intel MiDas
        self.initialized_midas = False
        rospack = rospkg.RosPack()
        ros_pkg_path = rospack.get_path('intelisl_midas_ros')
        model_path = os.path.join(ros_pkg_path, 'src/model-f6b98070.pt')

        self.model = MidasNet(model_path, non_negative=True)
        self.device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.model.to(self.device)
        self.model.eval()
        rospy.loginfo('Loaded Intel MiDaS')

        midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
        self.transform = midas_transforms.default_transform
        rospy.loginfo('Initialized Intel MiDaS transform')
        self.initialized_midas = True
def init_model(transform):
    # set torch options
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    model_path = "../MiDaS/model-f46da743.pt"

    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose(
        [
            Resize(
                384,
                384,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ]
    )

    model.to(device)
    model.eval()
    return (model, transform, device), None
    def forward(self, x):
        """Forward pass.

        Args:
            x (tensor): input data (image)

        Returns:
            tensor: depth
        """

        mean = torch.tensor([0.485, 0.456, 0.406])
        std = torch.tensor([0.229, 0.224, 0.225])
        x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])

        return MidasNet.forward(self, x)
Exemplo n.º 5
0
    def __init__(self, model_type, model_path, optimize):
        print("initialize")

        # select device
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("device: %s" % self.device)

        # load network
        if model_type == "large":
            self.model = MidasNet(model_path, non_negative=True)
            self.net_w, self.net_h = 384, 384
        elif model_type == "small":
            self.model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True,
                                   non_negative=True, blocks={'expand': True})
            self.net_w, self.net_h = 256, 256
        else:
            print(f"model_type '{model_type}' not implemented, use: --model_type large")
            assert False

        self.transform = Compose(
            [
                Resize(
                    self.net_w,
                    self.net_h,
                    resize_target=None,
                    keep_aspect_ratio=True,
                    ensure_multiple_of=32,
                    resize_method="upper_bound",
                    image_interpolation_method=cv2.INTER_CUBIC,
                ),
                NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                PrepareForNet(),
            ]
        )

        self.model.eval()
        self.optimize = optimize
        if self.optimize:
            rand_example = torch.rand(1, 3, self.net_h, self.net_w)
            self.model(rand_example)
            traced_script_module = torch.jit.trace(self.model, rand_example)
            self.model = traced_script_module

            if self.device == torch.device("cuda"):
                self.model = self.model.to(memory_format=torch.channels_last)
                self.model = self.model.half()

        self.model.to(self.device)
Exemplo n.º 6
0
def convert(in_model_path, out_model_path):
    """Run MiDaS to create the torchscript model.
    https://pytorch.org/tutorials/advanced/cpp_export.html

    :param in_model_path: the path to the pre-trained model
    :param out_model_path: the output filename
    """
    # select device
    device = torch.device("cuda")
    print("device: %s" % device)

    # load network
    example_input = torch.rand(1, 3, 288, 384, dtype=torch.float32)
    model = MidasNet(in_model_path, non_negative=True)
    sm = torch.jit.trace(model, example_input)

    # Save the torchscript model
    sm.save(out_model_path)
def init_model(transform):
    parser = argparse.ArgumentParser()
    parser.add_argument('-mw', '--model_weights', 
        default='model-f6b98070.pt',
        help='path to the trained weights of model'
    )

    parser.add_argument('-mt', '--model_type', 
        default='large',
        help='model type: large or small'
    )

    parser.add_argument('--optimize', dest='optimize', action='store_true')
    parser.add_argument('--no-optimize', dest='optimize', action='store_false')
    parser.set_defaults(optimize=True)

    args, unknown = parser.parse_known_args()    
    
    # set torch options
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    if args.model_type == "large":
        model_path = "../MiDaS/"+args.model_weights
        model = MidasNet(model_path, non_negative=True)
        net_w, net_h = 384, 384
    elif args.model_type == "small":
        if "small" not in args.model_weights:
            args.model_weights = "model-small-70d6b9c8.pt"
        model_path = "../MiDaS/"+args.model_weights
        model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True})
        net_w, net_h = 256, 256
    else:
        print(f"model_type '{model_type}' not implemented, use: --model_type large")
        assert False

    transform = Compose(
        [
            Resize(
                net_w,
                net_h,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ]
    )

    model.eval()
    
    if args.optimize==True:
        rand_example = torch.rand(1, 3, net_h, net_w)
        model(rand_example)
        traced_script_module = torch.jit.trace(model, rand_example)
        model = traced_script_module
    
        if device == torch.device("cuda"):
            model = model.to(memory_format=torch.channels_last)  
            model = model.half()

    model.to(device)    
    
    return (model, transform, device, args.optimize), args
Exemplo n.º 8
0
def run(input_path,
        output_path,
        model_path,
        model_type="large",
        optimize=True):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    if model_type == "large":
        model = MidasNet(model_path, non_negative=True)
        net_w, net_h = 384, 384
    elif model_type == "small":
        model = MidasNet_small(model_path,
                               features=64,
                               backbone="efficientnet_lite3",
                               exportable=True,
                               non_negative=True,
                               blocks={'expand': True})
        net_w, net_h = 256, 256
    else:
        print(
            f"model_type '{model_type}' not implemented, use: --model_type large"
        )
        assert False

    transform = Compose([
        Resize(
            net_w,
            net_h,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.eval()

    if optimize == True:
        rand_example = torch.rand(1, 3, net_h, net_w)
        model(rand_example)
        traced_script_module = torch.jit.trace(model, rand_example)
        model = traced_script_module

        if device == torch.device("cuda"):
            model = model.to(memory_format=torch.channels_last)
            model = model.half()

    model.to(device)

    # get input
    img_names = glob.glob(os.path.join(input_path, "*"))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind, img_name in enumerate(img_names):

        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))

        # input

        img = utils.read_image(img_name)
        img_input = transform({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
            if optimize == True and device == torch.device("cuda"):
                sample = sample.to(memory_format=torch.channels_last)
                sample = sample.half()
            prediction = model.forward(sample)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())
            prediction /= 1000

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])
        utils.write_depth(filename, prediction, bits=2)
        print(prediction)
        print(prediction.shape)

    print("finished")
Exemplo n.º 9
0
def run(input_path, output_path, model_path):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    # get input
    img_names = glob.glob(os.path.join(input_path, "*"))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind, img_name in enumerate(img_names):

        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))

        # input

        img = utils.read_image(img_name)
        img_input = transform({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
            prediction = model.forward(sample)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])
        utils.write_depth(filename, prediction, bits=2)

    print("finished")
def run(model_path):
    """
    Run MonoDepthNN to compute depth maps.
    """
    # set torch options
    torch.cuda.empty_cache()
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # select device
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    cam = cv2.VideoCapture(0)
    cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)
    cam.set(cv2.CAP_PROP_FPS, 30)

    while True:
        t = time.time()
        _, left_img = cam.read()
        image = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGB) / 255.0

        #  Apply transforms
        image = transform({"image": image})["image"]

        #  Predict and resize to original resolution
        with torch.no_grad():
            image = torch.from_numpy(image).to(device).unsqueeze(0)
            depth = model.forward(image)
            depth = (torch.nn.functional.interpolate(
                depth.unsqueeze(1),
                size=left_img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        depth_map = write_depth(depth, bits=2, reverse=False)

        right_img = generate_stereo(left_img, depth_map)
        anaglyph = overlap(left_img, right_img)

        cv2.imshow("anaglyph", anaglyph)

        fps = 1. / (time.time() - t)
        print('\rframerate: %f fps' % fps, end='')
        cv2.waitKey(1)
Exemplo n.º 11
0
def main():
    global args, best_result, output_directory

    # set random seed
    torch.manual_seed(args.manual_seed)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        args.batch_size = args.batch_size * torch.cuda.device_count()
    else:
        print("Let's use GPU ", torch.cuda.current_device())

    train_loader, val_loader = create_loader(args)

    if args.resume:
        assert os.path.isfile(args.resume), \
            "=> no checkpoint found at '{}'".format(args.resume)
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)

        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        optimizer = checkpoint['optimizer']

        # model_dict = checkpoint['model'].module.state_dict()  # to load the trained model using multi-GPUs
        # model = FCRN.ResNet(output_size=train_loader.dataset.output_size, pretrained=False)
        # model.load_state_dict(model_dict)

        # solve 'out of memory'
        model = checkpoint['model']

        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))

        # clear memory
        del checkpoint
        # del model_dict
        torch.cuda.empty_cache()
    else:
        print("=> creating Model")
        model = MidasNet()
        print("=> model created.")
        start_epoch = 0

        # different modules have different learning rate
        train_params = [{
            'params': model.pretrained.parameters(),
            'lr': args.lr
        }, {
            'params': model.scratch.parameters(),
            'lr': args.lr * 10
        }]

        optimizer = torch.optim.Adam(train_params,
                                     lr=args.lr,
                                     betas=args.betas,
                                     weight_decay=args.weight_decay)

        # You can use DataParallel() whether you use Multi-GPUs or not
        model = nn.DataParallel(model).cuda()

    # when training, use reduceLROnPlateau to reduce learning rate
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=args.lr_patience)

    # loss function
    criterion = criteria.ScaleAndShiftInvariantLoss()

    # create directory path
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    best_txt = os.path.join(output_directory, 'best.txt')
    config_txt = os.path.join(output_directory, 'config.txt')

    # write training parameters to config file
    if not os.path.exists(config_txt):
        with open(config_txt, 'w') as txtfile:
            args_ = vars(args)
            args_str = ''
            for k, v in args_.items():
                args_str = args_str + str(k) + ':' + str(v) + ',\t\n'
            txtfile.write(args_str)

    # create log
    log_path = os.path.join(
        output_directory, 'logs',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)
    os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    for epoch in range(start_epoch, args.epochs):

        # remember change of the learning rate
        for i, param_group in enumerate(optimizer.param_groups):
            old_lr = float(param_group['lr'])
            logger.add_scalar('Lr/lr_' + str(i), old_lr, epoch)

        train(train_loader, model, criterion, optimizer, epoch,
              logger)  # train for one epoch
        result, img_merge = validate(val_loader, model, epoch,
                                     logger)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}, rmse={:.3f}, rml={:.3f}, log10={:.3f}, d1={:.3f}, d2={:.3f}, dd31={:.3f}, "
                    "t_gpu={:.4f}".format(epoch, result.rmse, result.absrel,
                                          result.lg10, result.delta1,
                                          result.delta2, result.delta3,
                                          result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        # save checkpoint for each epoch
        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)

        # when rml doesn't fall, reduce learning rate
        scheduler.step(result.absrel)

    logger.close()
Exemplo n.º 12
0
class MiDaSROS:
    def __init__(self):
        '''Initialize ros publisher, ros subscriber'''
        # topic where we publish
        self.bridge = CvBridge()
        self.image_depth_pub = rospy.Publisher("/midas/depth/image_raw",
                                               Image,
                                               queue_size=1)
        self.image_rgb_pub = rospy.Publisher("/midas/rgb/image_raw",
                                             Image,
                                             queue_size=1)
        self.camera_info_pub = rospy.Publisher("/midas/camera_info",
                                               CameraInfo,
                                               queue_size=1)

        # subscribed Topic
        self.subscriber = rospy.Subscriber("/midas_rgb/image_raw",
                                           Image,
                                           self.callback,
                                           queue_size=1)

        # setup image display
        self.display_rgb = False
        self.display_depth = True

        # initialize Intel MiDas
        self.initialized_midas = False
        rospack = rospkg.RosPack()
        ros_pkg_path = rospack.get_path('intelisl_midas_ros')
        model_path = os.path.join(ros_pkg_path, 'src/model-f6b98070.pt')

        self.model = MidasNet(model_path, non_negative=True)
        self.device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")
        self.model.to(self.device)
        self.model.eval()
        rospy.loginfo('Loaded Intel MiDaS')

        midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
        self.transform = midas_transforms.default_transform
        rospy.loginfo('Initialized Intel MiDaS transform')
        self.initialized_midas = True

    def show_image(self, img, window_name="Image Window"):
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
        cv2.imshow(window_name, img)
        cv2.waitKey(2)

    def callback(self, img_msg):
        # conversion to OpenCV and the correct color
        img = cv2.cvtColor(
            self.bridge.imgmsg_to_cv2(img_msg, desired_encoding='passthrough'),
            cv2.COLOR_BGR2RGB)
        if self.display_rgb:
            self.show_image(img, window_name='Ground Truth RGB')

        # convert RGB to depth using MiDaS
        if self.initialized_midas:
            input_batch = self.transform(img).to(self.device)
            with torch.no_grad():
                prediction = self.model(input_batch)
                prediction = torch.nn.functional.interpolate(
                    prediction.unsqueeze(1),
                    size=img.shape[:2],
                    mode="bicubic",
                    align_corners=False,
                ).squeeze()

            # scale pixel values to display
            omax, omin = prediction.max(), prediction.min()
            prediction = (prediction - omin) / (omax - omin)

            # convert depth prediction to numpy
            output = prediction.cpu().numpy()
            if self.display_depth:
                self.show_image(output, window_name='Estimated Depth')

            # setup message (depth)
            depth_msg = self.bridge.cv2_to_imgmsg(output,
                                                  encoding="passthrough")

            # setup message camera info
            camera_info_msg = CameraInfo()
            camera_info_msg.header.stamp = img_msg.header.stamp
            camera_info_msg.height = img.shape[0]
            camera_info_msg.width = img.shape[1]

            # publish
            self.image_depth_pub.publish(depth_msg)
            self.image_rgb_pub.publish(img_msg)
            self.camera_info_pub.publish(camera_info_msg)
Exemplo n.º 13
0
def run(model_path):
    """
    Run MonoDepthNN to compute depth maps.
    """
    # Input images
    img_list = os.listdir(args.input)
    img_list.sort()

    # output dir
    output_dir = './depth'
    os.makedirs(output_dir, exist_ok=True)

    # set torch options
    torch.cuda.empty_cache()
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # select device
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    for idx in tqdm(range(len(img_list))):
        sample = img_list[idx]
        raw_image = cv2.imread(os.path.join(args.input, sample))
        raw_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0

        #  Apply transforms
        image = transform({"image": raw_image})["image"]

        #  Predict and resize to original resolution
        with torch.no_grad():
            image = torch.from_numpy(image).to(device).unsqueeze(0)
            prediction = model.forward(image)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=raw_image.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        depth_map = write_depth(prediction, bits=2, reverse=False)

        cv2.imwrite(
            os.path.join(output_dir,
                         'MiDaS_{}.png'.format(sample.split('.')[0])),
            depth_map)
Exemplo n.º 14
0
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # Setup AdaBins model
        self.adabins_nyu_infer_helper = InferenceHelper(dataset='nyu',
                                                        device=self.device)
        self.adabins_kitti_infer_helper = InferenceHelper(dataset='kitti',
                                                          device=self.device)

        # Setup DiverseDepth model
        class DiverseDepthArgs:
            def __init__(self):
                self.resume = False
                self.cfg_file = "lib/configs/resnext50_32x4d_diversedepth_regression_vircam"
                self.load_ckpt = "pretrained/DiverseDepth.pth"

        diverse_depth_args = DiverseDepthArgs()
        merge_cfg_from_file(diverse_depth_args)
        self.diverse_depth_model = RelDepthModel()
        self.diverse_depth_model.eval()
        # load checkpoint
        load_ckpt(diverse_depth_args, self.diverse_depth_model)
        # TODO: update this - see how `device` argument should be processsed
        if self.device == "cuda":
            self.diverse_depth_model.cuda()
        self.diverse_depth_model = torch.nn.DataParallel(
            self.diverse_depth_model)

        # Setup MiDaS model
        self.midas_model_path = "./pretrained/MiDaS_f6b98070.pt"
        midas_model_type = "large"

        # load network
        if midas_model_type == "large":
            self.midas_model = MidasNet(self.midas_model_path,
                                        non_negative=True)
            self.midas_net_w, self.midas_net_h = 384, 384
        elif midas_model_type == "small":
            self.midas_model = MidasNet_small(self.midas_model_path,
                                              features=64,
                                              backbone="efficientnet_lite3",
                                              exportable=True,
                                              non_negative=True,
                                              blocks={'expand': True})
            self.midas_net_w, self.midas_net_h = 256, 256

        self.midas_transform = Compose([
            Resize(
                self.midas_net_w,
                self.midas_net_h,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ])

        self.midas_model.eval()

        self.midas_optimize = True
        if self.midas_optimize == True:
            rand_example = torch.rand(1, 3, self.midas_net_h, self.midas_net_w)
            self.midas_model(rand_example)
            traced_script_module = torch.jit.trace(self.midas_model,
                                                   rand_example)
            self.midas_model = traced_script_module

            if self.device == "cuda":
                self.midas_model = self.midas_model.to(
                    memory_format=torch.channels_last)
                self.midas_model = self.midas_model.half()

        self.midas_model.to(torch.device(self.device))

        # Setup SGDepth model
        self.sgdepth_model = InferenceEngine.SgDepthInference()

        # Setup monodepth2 model
        self.monodepth2_model_path = "pretrained/monodepth2_mono+stereo_640x192"
        monodepth2_device = torch.device(self.device)
        encoder_path = os.path.join(self.monodepth2_model_path, "encoder.pth")
        depth_decoder_path = os.path.join(self.monodepth2_model_path,
                                          "depth.pth")

        # LOADING PRETRAINED MODEL
        print("   Loading Monodepth2 pretrained encoder")
        self.monodepth2_encoder = networks.ResnetEncoder(18, False)
        loaded_dict_enc = torch.load(encoder_path,
                                     map_location=monodepth2_device)

        # extract the height and width of image that this model was trained with
        self.feed_height = loaded_dict_enc['height']
        self.feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self.monodepth2_encoder.state_dict()
        }
        self.monodepth2_encoder.load_state_dict(filtered_dict_enc)
        self.monodepth2_encoder.to(monodepth2_device)
        self.monodepth2_encoder.eval()

        print("   Loading pretrained decoder")
        self.monodepth2_depth_decoder = networks.DepthDecoder(
            num_ch_enc=self.monodepth2_encoder.num_ch_enc, scales=range(4))

        loaded_dict = torch.load(depth_decoder_path,
                                 map_location=monodepth2_device)
        self.monodepth2_depth_decoder.load_state_dict(loaded_dict)

        self.monodepth2_depth_decoder.to(monodepth2_device)
        self.monodepth2_depth_decoder.eval()
Exemplo n.º 15
0
        'truth_thresh':
        1,
        'random':
        1,
        'stride': [32, 16, 8]
    }

    plane_segmentation_cfg = {
        "meta_data_path":
        "G:/EVA5/ToGit/Planercnn/content/planercnn/test/inference/"
    }

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    midas_model = MidasNet(r"G:\EVA5\ToGit\model-f6b98070.pt",
                           non_negative=True)
    midas_model.eval()
    midas_model.to(device)
    #print(midas_model)
    print("Model Loaded")

    # model = CustomNet("model-f46da743.pt", non_negative=True, yolo_cfg=yolo_cfg)
    model = CustomNet("G:\EVA5\ToGit\yolov3-spp-ultralytics.pt",
                      non_negative=True,
                      yolo_cfg=yolo_cfg)

    model.gr = 1.0
    model.hyp = hyp
    model.to(device)

    #print(model)