def __init__(self,
                 paths_file,
                 loader=default_loader,
                 transform=None,
                 target_transform=None,
                 frac_train=0.8,
                 visibility=0.3,
                 min_width=0,
                 min_height=0):
        """
        inits of all file names and bounding boxes
        """
        self.loader = loader
        self.transform = trans.Compose([transform, trans.ToTensor()])
        self.target_transform = target_transform
        paths = motu.parse_videos_file(paths_file)
        current_index = 0
        used_index = [0, 2, 3, 4, 5]
        self.all_gt = np.zeros([0, 5])
        self.all_imagepaths = []
        self.num_classes = 80

        for path in paths:
            gt, img, info = motu.get_gt_img_inf(path)
            modified_length = int(info.seqLength * frac_train)

            gt = gt[gt[:, 2] < info.imWidth]  # remove boxes out of the image
            gt = gt[gt[:, 3] < info.imHeight]  # remove boxes out of the image

            neg_ids_x = np.where(gt[:, 2] < 0)
            neg_ids_y = np.where(gt[:, 3] < 0)

            pos_ids_x = np.where(gt[:, 2] + gt[:, 4] >= info.imWidth)
            pos_ids_y = np.where(gt[:, 3] + gt[:, 5] >= info.imHeight)
            gt[neg_ids_x, 4] += gt[
                neg_ids_x,
                2]  # if we move the top left corner into the image we must adapt the height
            gt = gt[gt[:, 4] > 0]  # make sure that width stayed positiv
            gt[neg_ids_x, 2] = 0
            gt[neg_ids_y, 5] += gt[neg_ids_y,
                                   3]  # same here (dont know if y<0 exists)
            gt = gt[gt[:, 5] > 0]  # make sure that height stayed positiv
            gt[neg_ids_y, 3] = 0
            gt[pos_ids_x,
               4] = info.imWidth - gt[pos_ids_x,
                                      2] - 1  # equal would also be bad
            gt[pos_ids_y, 5] = info.imHeight - gt[pos_ids_y, 3] - 1

            gt = gt[gt[:, 8] > visibility, :]
            gt = gt[gt[:, 5] > min_height, :]
            gt = gt[gt[:, 4] > min_width, :]

            gt = motu.transform_bb_to_centered(gt)
            gt = motu.filter_person(gt)
            gt = motu.filter_frames(gt, 0, modified_length)
            gt = gt[:, used_index]
            gt[:, 0] += current_index
            current_index += modified_length
            self.all_gt = np.concatenate((self.all_gt, gt), 0)
            self.all_imagepaths += img[:modified_length]
    def __getitem__(self, index):
        """
        load the image itself
        and choose the right bb frame
        """
        target = motu.filter_gt(self.all_gt, index)
        target = torch.from_numpy(target[:, 1:])
        sample = self.loader(self.all_imagepaths[index])
        width, height = sample.size
        if self.transform is not None:
            sample = self.transform(sample)
        if height != sample.shape[2] and width != sample.shape[1]:
            height_scale = height / sample.shape[2]
            width_scale = width / sample.shape[1]
            if self.target_transform is not None:
                target = trans.Compose([
                    trans.Lambda(lambda x: motu.resize_bb(
                        x, height_scale, width_scale)), self.target_transform
                ])(target)
            else:
                target = motu.resize_bb(target, height_scale, width_scale)
        elif self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target
Пример #3
0
if not os.path.isdir(log_path):
    os.makedirs(log_path)

batch_size = 1  # only works with batchsize 1
cuda = False  # careful can take up much gpu memory
save_every_image = False  # if true all images are additionaly saved as .png
anchors = [(0.43, 1.715), (0.745625, 3.645), (1.24375, 5.9325), (2.5, 12.24),
           (6.1225, 22.41375)]
net = YoloLSTM(batch_size, image_size=832, anchors=anchors)
net.load_snapshot(snapshot_path)
net.eval()
if cuda:
    net.cuda()
net.reinit_lstm(batch_size)
imgs = []
_, img_paths, _ = motu.get_gt_img_inf(path)
a = 0
with torch.no_grad():
    for img_path in img_paths[int(len(img_paths) * .8):]:
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (net.image_size, net.image_size))
        img = image.copy()
        image = torch.from_numpy(
            np.transpose(image.astype(np.float32), (2, 0, 1))).unsqueeze(0)
        if cuda:
            image = image.cuda()
        logits = net(image)
        if cuda:
            logits = logits.cpu()
        boxes = post_processing(logits, net.image_size, net.anchors, 0.25, 0.5)
Пример #4
0
    def __init__(self, paths_file, seq_length=20, new_height=416, new_width=416, step=5,
                 valid_ratio=0.2, use_only_first_video=False):
        """

        :param paths_file: path to the file which includes the paths of the images and labels
        :param seq_length: length of the sequences to be generated
        :param new_height: resize images to this height
        :param new_width: resize labels to this width
        :param step: steps between each sequence
        :param valid_ratio: number of images used for validation. I.e the last 20% will be used for validation
        :param use_only_first_video: for debug purposes-> only the first video is used for dataset construction
        """
        # framerate is not constant 05 SDP has 14 fps  vs 02 SDP 30 fps -> could be a problem
        # the movement is also a problem especially if the movement changes for the prediction time
        # crop negative boxes (maybe not necessary)
        self.seq_length = seq_length
        self.im_size = (new_height, new_width)

        paths = motu.parse_videos_file(paths_file)

        #            dict    list  list
        # tmp format video->frame->boxes with id

        videos_train = {i: {"gt": [], "path": ""} for i in range(len(paths))}
        videos_valid = {i: {"gt": [], "path": ""} for i in range(len(paths))}
        frame_offsets = []
        # stats stuff
        all_traj_lengths = np.zeros(1)
        all_displacements = np.zeros(1)

        for i, path in enumerate(paths):
            if i != 0 and use_only_first_video:
                break
            # gt format: [frame id 4 box parameter]
            # get ground truth (gt)
            gt, info = motu.get_gt_info(path)
            num_frames = info.seqLength

            # remove everything that is not a pedestrian
            cond = np.vstack([(gt[:, 7] == 1).reshape(1, -1), (gt[:, 7] == 7).reshape(1, -1)]).T
            gt = gt[np.where(np.any(cond, axis=1))]
            # pedestrian ids are now discontinuous which results in tougher stats calculation
            # -> calc correction and apply it
            ids = gt[:, 1]
            ids_diff = ids[1:] - ids[:-1]  # first try with convolve didn't work
            ids_diff[ids_diff != 0] -= 1  # remove natural discontinuity resulting from counting upwards
            correct = np.cumsum(ids_diff)  # cum
            gt[1:, 1] -= correct  # assume the first sample has correct id
            gt[:, 1] -= gt[0, 1] - 1  # the pedestrians diff may begin with n != 1

            # change negative boxes (dont know if it is really important)(remove it if not necessary anymore)
            # ( i really hope there are no to big boxes -> there are to big boxes (face palm))

            neg_ids_x = np.where(gt[:, 2] < 0)
            neg_ids_y = np.where(gt[:, 3] < 0)
            pos_ids_x = np.where(gt[:, 2] + gt[:, 4] >= info.imWidth)
            pos_ids_y = np.where(gt[:, 3] + gt[:, 5] >= info.imHeight)
            gt[neg_ids_x, 4] += gt[
                neg_ids_x, 2]  # if we move the top left corner into the image we must adapt the height
            gt[neg_ids_x, 2] = 0
            gt[neg_ids_y, 5] += gt[neg_ids_y, 3]  # same here (dont know if y<0 exists)
            gt[neg_ids_y, 3] = 0
            gt[pos_ids_x, 4] = info.imWidth - gt[pos_ids_x, 2] - 1  # equal would also be bad
            gt[pos_ids_y, 5] = info.imHeight - gt[pos_ids_y, 3] - 1

            # resize boxes and normalize data
            height_scale = info.imHeight / new_height
            width_scale = info.imWidth / new_width
            gt = motu.transform_bb_to_centered(gt)
            gt[:, [2, 4]] /= width_scale
            gt[:, [3, 5]] /= height_scale

            # calc stats i.e trajectory length (mean , std, max min)
            max_ped_id = int(np.max(gt[:, 1]))  # ids start at 1
            # for each id calc the number of sample
            traj_lengths = np.zeros(max_ped_id)
            displacements = np.zeros(max_ped_id)
            for ped_id in range(max_ped_id):
                ids = np.where(gt[:, 1] == ped_id + 1)[0]
                # trajectory length
                traj_lengths[ped_id] = ids.shape[0]
                # displacement from begin to end
                start = gt[ids[0], 2:4]
                end = gt[ids[-1], 2:4]
                # if np.sqrt(np.sum(np.square(start-end))) > 416:
                #    print(start, end)
                displacements[ped_id] = np.sqrt(np.sum(np.square(start - end)))
            all_traj_lengths = np.concatenate([all_traj_lengths, traj_lengths])
            all_displacements = np.concatenate([all_displacements, displacements])

            # create structure
            # create 10 frame gap between train and valid data
            train_test_split_idx = int(num_frames * (1.0 - valid_ratio))
            frame_offsets.append(train_test_split_idx)
            num_train_frames = train_test_split_idx - 10
            num_valid_frames = num_frames - train_test_split_idx

            videos_train[i]["gt"] = [None for j in range(num_train_frames)]
            videos_train[i]["path"] = path + "img1/"
            for j in range(num_train_frames):
                videos_train[i]["gt"][j] = gt[np.where(gt[:, 0] == j)]
            videos_valid[i]["gt"] = [None for j in range(num_valid_frames)]
            videos_valid[i]["path"] = path + "img1/"
            for j in range(num_valid_frames):
                videos_valid[i]["gt"][j] = gt[np.where(gt[:, 0] == j + train_test_split_idx)]

        # [batch_size, seq_index, (id bb)]: goal
        # self.sequences contains train+valid
        # each element of sequence is [seq_length, 120, 5]
        # for debug frame_sequences contains list with source path of the corresponding image
        self.sequences, self.frame_paths = self._intermediate_to_final(videos_train, self.seq_length, step,
                                                                       remaining_peds_idx=self.seq_length//2)
        self.valid_begin = len(self.sequences)  # important for datasplit with data.subset
        valid_seq, valid_frames = self._intermediate_to_final(videos_valid, self.seq_length, step, frame_offsets,
                                                              remaining_peds_idx=self.seq_length//2)
        self.sequences += valid_seq
        self.frame_paths += valid_frames
        # print stats
        all_traj_lengths = all_traj_lengths[1:]  # first element is dummy
        print("Mean trajectory length: {}".format(np.mean(all_traj_lengths)))
        print("Deviation of trajectory length: {}".format(np.std(all_traj_lengths)))
        print("Max trajectory length: {}".format(np.max(all_traj_lengths)))
        print("Min trajectory length: {}".format(np.min(all_traj_lengths)))
        all_displacements = all_displacements[1:]
        print("Mean displacement: {}".format(np.mean(all_displacements)))
        print("Deviation of displacements: {}".format(np.std(all_displacements)))
        print("Max displacement: {}".format(np.max(all_displacements)))
        print("Min displacement: {}".format(np.min(all_displacements)))
Пример #5
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-filelist',
                        default='Mot17_test_single.txt',
                        help='path to filelist\n')
    parser.add_argument('-output_dir',
                        default='generated_anchors/anchors',
                        type=str,
                        help='Output anchor directory\n')
    parser.add_argument('-num_clusters',
                        default=0,
                        type=int,
                        help='number of clusters\n')
    filelist = 'Mot17_test_single.txt'
    output_dir = 'anchors'
    arg_num_clusters = 0

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    paths_file = filelist
    paths = motu.parse_videos_file(paths_file)  # todo
    all_boxes = np.zeros([0, 2])

    for i, path in enumerate(paths):
        # gt format: [frame id 4 box parameter]
        # tmp debug until i download the dataset
        path = '../' + path
        # tmp
        # get ground truth (gt)
        gt, info = motu.get_gt_info(path)
        num_frames = info.seqLength

        # remove everything that is not a pedestrian
        cond = np.vstack([(gt[:, 7] == 1).reshape(1, -1),
                          (gt[:, 7] == 7).reshape(1, -1)]).T
        gt = gt[np.where(np.any(cond, axis=1))]

        # change negative boxes (dont know if it is really important)(remove it if not necessary anymore)
        # ( i really hope there are no to big boxes -> there are to big boxes (face palm))
        neg_ids_x = np.where(gt[:, 2] < 0)
        neg_ids_y = np.where(gt[:, 3] < 0)
        pos_ids_x = np.where(gt[:, 2] + gt[:, 4] >= info.imWidth)
        pos_ids_y = np.where(gt[:, 3] + gt[:, 5] >= info.imHeight)
        gt[neg_ids_x, 4] += gt[
            neg_ids_x,
            2]  # if we move the top left corner into the image we must adapt the height
        gt[neg_ids_x, 2] = 0
        gt[neg_ids_y, 5] += gt[neg_ids_y,
                               3]  # same here (dont know if y<0 exists)
        gt[neg_ids_y, 3] = 0
        gt[pos_ids_x,
           4] = info.imWidth - gt[pos_ids_x, 2] - 1  # equal would also be bad
        gt[pos_ids_y, 5] = info.imHeight - gt[pos_ids_y, 3] - 1

        # normalise boxes
        height_scale = info.imHeight / height_in_cfg_file
        width_scale = info.imWidth / width_in_cfg_file
        gt[:, [2, 4]] /= width_scale
        gt[:, [3, 5]] /= height_scale

        all_boxes = np.concatenate([all_boxes, gt[:, [4, 5]]])

    annotation_dims = all_boxes

    eps = 0.005

    if arg_num_clusters == 0:
        for num_clusters in range(1, 11):  # we make 1 through 10 clusters
            anchor_file = join(output_dir, 'anchors%d.txt' % (num_clusters))

            indices = [
                random.randrange(annotation_dims.shape[0])
                for i in range(num_clusters)
            ]
            centroids = annotation_dims[indices]
            kmeans(annotation_dims, centroids, eps, anchor_file)
            print('centroids.shape', centroids.shape)
    else:
        anchor_file = join(output_dir, 'anchors%d.txt' % (arg_num_clusters))
        indices = [
            random.randrange(annotation_dims.shape[0])
            for i in range(arg_num_clusters)
        ]
        centroids = annotation_dims[indices]
        kmeans(annotation_dims, centroids, eps, anchor_file)
        print('centroids.shape', centroids.shape)