Пример #1
0
    def transform_screen_data(self, screen):
        # Convert to float, rescale, convert to tensor
        screen = np.ascontigousarray(screen, dtype=np.float32) / 255
        screen = torch.from_numpy(screen)

        # Use torchvision package to compose image transforms
        resize = T.compose([T.ToPILImage(), T.Resize((40, 90)), T.Totensor()])

        return resize(screen).unsqueeze(0).to(
            self.device)  # add a batch dimension
Пример #2
0
def image2Tensor(cv2im, resize_im=True):
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    if resize_im:
        cv2im = cv2.resize(cv2im, (224, 224))
        im_as_arr = np.float32(cv2im)
        im_as_arr = np.ascontigousarray(im_as_arr[..., ::-1])
        im_as_arr = im_as_arr.transpose(2, 0, 1)
        for channel, _ in enumerate(im_as_arr):
            im_as_arr[channel] /= 255
            im_as_arr[channel] -= mean[channel]
            im_as_arr[channel] /= std[channel]

        im_as_ten = torch.from_numpy(im_as_arr).float()
        im_as_ten.unsqueeze_(0)
        im_as_var = Variable(im_as_ten, requires_grad=True)
        return im_as_var
Пример #3
0
    def read_feat(self,
                  video_name,
                  f_init=None,
                  duration=None,
                  return_reshaped=True):
        """Read visual encoder features and stack them into memory.

        Parameters
        ----------
        video_name : str.
            Video identifier.
        f_init : int, optional.
            Initial frame index. By default the feature is
            sliced from frame 1.
        duration : int, optional.
            Duration in term of number of frames. By default
            it is set till the last feature.
        return_reshaped : bool.
            Return stack of features reshaped after processing.
        """
        if not self.fobj:
            raise ValueError('The object instance is not open.')
        T = self.t_size
        s = self.t_delta
        if f_init and duration:
            frames_of_interest = range(f_init, f_init + duration - T + 1, s)
            feat = self.fobj[video_name][self.feat_id][frames_of_interest, :]
        elif f_init and (not duration):
            feat = self.fobj[video_name][self.feat_id][f_init:-T + 1:s, :]
        elif (not f_init) and duration:
            feat = self.fobj[video_name][self.feat_id][:duration - T + 1:s, :]
        else:
            feat = self.fobj[video_name][self.feat_id][:-T + 1:s, :]
        pooled_feat = self._feature_pooling(feat)

        if not return_reshaped:
            feat_dim = feat.shape[1]
            pooled_feat = pooled_feat.reshape((-1, feat_dim))
            if not pooled_feat.flags['C_CONTIGUOUS']:
                return np.ascontigousarray(pooled_feat)
        return pooled_feat
Пример #4
0
    def read_feat(self, video_name, f_init=None, duration=None,
                  return_reshaped=True):
        """Read C3D features and stack them into memory.
        Parameters
        ----------
        video-name : str.
            Video identifier.
        f_init : int, optional.
            Initial frame index. By default the feature is
            sliced from frame 1.
        duration : int, optional.
            Duration in term of number of frames. By default
            it is set till the last feature.
        return_reshaped : bool.
            Return stack of features reshaped when pooling is applied.
        """
        if not self.fobj:
            raise ValueError('The object instance is not open.')
        s = self.t_stride
        t_size = self.t_size
        if f_init and duration:
            frames_of_interest = range(f_init, 
                                       f_init + duration - t_size + 1, s)
            feat = self.fobj[video_name][self.feat_id][frames_of_interest, :]
        elif f_init and (not duration):
            feat = self.fobj[video_name][self.feat_id][f_init:-t_size+1:s, :]
        elif (not f_init) and duration:
            feat = self.fobj[video_name][self.feat_id][:duration-t_size+1:s, :]
        else:
            feat = self.fobj[video_name][self.feat_id][:-t_size+1:s, :]
        pooled_feat = self._feature_pooling(feat)

        if not return_reshaped:
            feat_dim = feat.shape[1]
            pooled_feat = pooled_feat.reshape((-1, feat_dim))
            if not pooled_feat.flags['C_CONTIGUOUS']:
                return np.ascontigousarray(pooled_feat)
        return pooled_feat