示例#1
0
    def darknet_preprocess(self, images):
        # resize the images to shorter edge be neth
        neth = netw = self.resolution
        im_h = images.shape[1]
        im_w = images.shape[2]

        if (1.0 * netw / im_w) < (1.0 * neth / im_h):
            new_w = netw
            new_h = (im_h * netw) // im_w
        else:
            new_h = neth
            new_w = (im_w * neth) // im_h
        images = resize_images(images, [new_h, new_w])
        resized = images

        # BCHW format
        images = np.transpose(images, (0, 3, 1, 2))
        # RGB to BGR
        images = images[:, ::-1, :, :]
        # to float and to 0-1
        images = images / 255.0
        # as continuous memory
        images = np.ascontiguousarray(images, dtype=np.float32)
        dark_frames = Image(images)
        return dark_frames, resized, images
示例#2
0
    def compute_logits(self, image):
        image = resize_images(image, [self.height, self.width])
        image_bak = image
        # image shape B*H*W*C
        image = np.transpose(image, (0, 3, 1, 2))
        # has shape B*C*H*W
        image = image.astype(np.float32)
        image = image / 255.0

        image = torch.fromNumpyArray(image)
        out = self.segment_func(image, self.output_downsample_factor)
        out = out.asNumpyArray()
        # out has shape batch*H*W*#classes

        if self.attach_lane_color:
            # compute the lane mask
            assert (out.shape[3] == 6)
            argmax = np.argmax(out, axis=3)
            # now shape B*H*W
            not_lane = (argmax != 5)
            downsampled_images = image_bak[:, ::self.
                                           output_downsample_factor, ::self.
                                           output_downsample_factor, :]
            downsampled_images[not_lane, :] = 0
            downsampled_images = downsampled_images / 255.0
            out = np.concatenate((out, downsampled_images), axis=3)

        return out
示例#3
0
def split_camera_middle_batch(sensor_data, sensor_names):
    id = sensor_names.index('CameraMiddle')
    rest_data = sensor_data[0:id] + sensor_data[(id + 1):]
    middle = sensor_data[id]

    # now splitting the image into two smaller ones
    middle_shape = middle.shape  # now shape is B H W C
    middle_shape = middle_shape[1:]
    middle = middle[:, middle_shape[0] // 4:middle_shape[0] * 3 // 4, :, :]
    left = middle[:, :, 0:middle_shape[1] // 2, :]
    left = resize_images(left, (middle_shape[0], middle_shape[1]))
    right = middle[:, :, middle_shape[1] // 2:, :]
    right = resize_images(right, (middle_shape[0], middle_shape[1]))
    rest_data += [left, right]

    return rest_data
示例#4
0
    def compute_logits(self, images):
        # might be zoomed images
        #images = resize_images(images, [512*3//4, 512])
        images = resize_images(images, [400 * 3 // 4, 400])

        [N, H, W, C] = images.shape

        assert H >= self._resolution[0]
        assert W >= self._resolution[1]

        # get central crop of given resolution
        h0 = int(0.5 * (H - self._resolution[0]))
        hN = int(0.5 * (H + self._resolution[0]))
        w0 = int(0.5 * (W - self._resolution[1]))
        wN = int(0.5 * (W + self._resolution[1]))

        center_crop = images[:, h0:hN, w0:wN, :]
        self.last_images = center_crop
        center_crop = center_crop.transpose(0, 3, 1, 2)
        center_crop = (center_crop - self._means_marvin_tensor) / 127.0
        center_crop = torch.from_numpy(center_crop).type(torch.FloatTensor)
        input = center_crop.cuda(self._opts['gpu'], non_blocking=True)

        logits = self._model(input)
        logits = logits.squeeze()
        #print("logits shape", logits.size()) # size is batch*1
        #logits = torch.nn.functional.sigmoid(logits)

        return logits
示例#5
0
    def compute_logits(self, image):
        # TODO pin_memory=True,
        rgb_mean = [0.41738699, 0.45732192, 0.46886091]
        rgb_std = [0.25685097, 0.26509955, 0.29067996]

        image = resize_images(image, [self.height, self.width])
        # image shape B*H*W*C
        with torch.no_grad():
            img = torch.from_numpy(image)
            img = img.cuda(non_blocking=True)
            # from B H W C to B C H W
            img = img.permute(0, 3, 1, 2)
            img = img.float()
            # from a int tensor to a float tensor
            img = img / 255.0

            # normalize it
            img.sub_(img.new(rgb_mean).view(1, -1, 1, 1))
            img.div_(img.new(rgb_std).view(1, -1, 1, 1))

            sem_logits = self.model(img)
            # this logits has size of batch*nclass*H*W

            # compute the visualization within cuda
            max_value, argmax = sem_logits.max(1)
            # the output has size of batch*H*W
            self.argmax = argmax.cpu().numpy()
            sem_logits = sem_logits.permute(0, 2, 3, 1)
            sem_logits = sem_logits.cpu().numpy()

        # out has shape batch * nclass * H * W: [1, 65, 72, 96]
        return sem_logits
示例#6
0
    def compute_disparity_average(self, images):
        # assume a RGB input image
        input_image = resize_images(images, [self.height, self.width])
        input_image = np.concatenate((input_image, input_image[:, :, ::-1, :]),
                                     axis=0)
        input_image = input_image.astype(np.float32) / 255

        disp = self.sess.run(self.model.disp_left_est[0],
                             feed_dict={self.left: input_image})
        disp_pp = self.post_process_disparity(disp.squeeze()).astype(
            np.float32)
        # convert w*h image to batch*w*h*1
        disp_pp = np.expand_dims(disp_pp, axis=3)
        return disp_pp
示例#7
0
    def compute(self, images, intermediate_size=(576, 768)):
        # depth 256*512, seg: 576*768, yolo 312*416
        if intermediate_size is not None:
            images = resize_images(images, intermediate_size)

        self.images = images

        for mode in self.instances.keys():
            assert (self.num_replicates[mode] == 1)
            conn = self.instances[mode]
            conn.send(("compute", images))

        out_logits = {}

        for mode in self.instances.keys():
            conn = self.instances[mode]
            out_logits[mode] = conn.recv()

        return out_logits
示例#8
0
    def compute_logits(self, image):
        image = resize_images(image, [self.height, self.width])

        image = image[:, ::2, ::2, :]

        # normalize with mean and std
        image = (image / 255.0 - self.mean) / self.std
        image = image.astype(np.float32)
        image = np.transpose(image, [0, 3, 1, 2])
        image = torch.from_numpy(image)

        image_var = Variable(image, requires_grad=False, volatile=True)
        pred = self.model(image_var)[0]
        pred = pred[:, :, ::self.output_downsample_factor, ::self.
                    output_downsample_factor]
        # _, pred = torch.max(final, 1)
        pred = pred.permute([0, 2, 3, 1])
        pred = pred.cpu().data.numpy()
        return pred
示例#9
0
    def compute(self, images, intermediate_size=(576, 768)):
        # depth 256*512, seg: 576*768, yolo 312*416
        if intermediate_size is not None:
            if intermediate_size[0] * images.shape[2] != images.shape[
                    1] * intermediate_size[1]:
                print(
                    "warning: the images aspect ratio is changed in all_perceptions"
                )
            images = resize_images(images, intermediate_size)

        self.images = images

        for mode in self.instances.keys():
            assert (self.num_replicates[mode] == 1)
            conn = self.instances[mode]
            conn.send(("compute", images))

        out_logits = {}

        for mode in self.instances.keys():
            conn = self.instances[mode]
            out_logits[mode] = conn.recv()

        return out_logits
示例#10
0
def camera_middle_zoom_batch(sensor_data, sensor_names, zoom_dict):
    out = {}
    for key in zoom_dict:
        id = sensor_names.index(key)
        # rest_data = sensor_data[0:id] + sensor_data[(id+1):]
        middle = sensor_data[id]

        if zoom_dict[key]:
            # now splitting the image into two smaller ones
            middle_shape = middle.shape  # now shape is B H W C
            middle_shape = middle_shape[1:]
            middle = middle[:, middle_shape[0] // 4:middle_shape[0] * 3 // 4,
                            middle_shape[1] // 4:middle_shape[1] * 3 // 4, :]
            middle = resize_images(middle, (middle_shape[0], middle_shape[1]))
        out[key] = middle

    ans = []
    for key in sensor_names:
        if key in out:
            ans.append(out[key])
        else:
            raise ValueError("zoom dict not complete")

    return ans
示例#11
0
    def _merge_logits_all_perception(self, logits_dict):
        res = []
        det_sz = (39, 52)
        #print(sorted(logits_dict.keys()))
        for key in sorted(logits_dict.keys()):
            if key == "seg":
                factor = 3
                size = (det_sz[0] * factor, det_sz[1] * factor)
                resized = resize_images(logits_dict[key],
                                        size,
                                        interpolation=cv2.INTER_NEAREST)
                resized *= 0.1
                resized = self._space2depth(resized, factor)
                res.append(resized)
            elif key == "depth":
                factor = 5
                size = (det_sz[0] * factor, det_sz[1] * factor)
                resized = resize_images(logits_dict[key],
                                        size,
                                        interpolation=cv2.INTER_LINEAR)
                resized *= 50
                resized = self._space2depth(resized, factor)
                res.append(resized)
            elif "det" in key:
                dB, dH, dW, dC = logits_dict[key].shape
                # compute the effective height
                eH = int(1.0 * det_sz[0] / det_sz[1] * dW)
                assert (eH == det_sz[0] and dW == det_sz[1])
                # compute the upper margine
                H_start = (dH - eH) // 2
                # crop the useful part
                cropped = logits_dict[key][:, H_start:(H_start + eH), :, :]

                # multiply the amplify factor
                num_classes = dC // 9 - 5
                # we amplify the objectness score by 10
                factor = [1.0] * 4 + [10.0] + [1.0] * num_classes
                factor = np.array(factor * 9)
                factor = np.reshape(factor, newshape=(1, 1, 1, -1))
                cropped = cropped * factor

                res.append(cropped)
            elif key == "seg_abn":
                factor = 2
                size = (det_sz[0] * factor, det_sz[1] * factor)
                resized = resize_images(logits_dict[key],
                                        size,
                                        interpolation=cv2.INTER_NEAREST)
                resized *= 0.1
                resized = self._space2depth(resized, factor)
                res.append(resized)
            elif key == "0intersection":
                factor = 1
                size = (det_sz[0] * factor, det_sz[1] * factor)
                # replicate the image to the size
                expanded = np.reshape(logits_dict[key], (-1, 1, 1, 1))
                resized = np.tile(expanded, (1, size[0], size[1], 1))

                res.append(resized)
            elif key == "drivable_area":
                factor = 3
                size = (det_sz[0] * factor, det_sz[1] * factor)
                resized = resize_images(logits_dict[key],
                                        size,
                                        interpolation=cv2.INTER_NEAREST)
                resized *= 0.1
                resized = self._space2depth(resized, factor)
                res.append(resized)
        concat = np.concatenate(res, axis=3)

        return concat