def __getitem__(self, index):
        minibatch_db = [self._roidb[index]]
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        rois = torch.from_numpy(blobs['rois'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

            if data_height > self.trim_height:
                # this means that data_width < data_height, we need to crop the
                # data_height
                min_y = int(torch.min(gt_boxes[:, 1]))
                max_y = int(torch.max(gt_boxes[:, 3]))
                trim_size = self.trim_height
                box_region = max_y - min_y + 1

                if min_y == 0:
                    y_s = 0
                else:
                    if (box_region - trim_size) < 0:
                        y_s_min = max(max_y - trim_size, 0)
                        y_s_max = min(min_y, data_height - trim_size)
                        if y_s_min == y_s_max:
                            y_s = y_s_min
                        else:
                            y_s = np.random.choice(range(y_s_min, y_s_max))

                    else:
                        y_s_add = int((box_region - trim_size) / 2)
                        if y_s_add == 0:
                            y_s = min_y
                        else:
                            y_s = np.random.choice(
                                range(min_y, min_y + y_s_add))

                # crop the image
                data = data[:, y_s:(y_s + trim_size), :, :]

                # shift y coordiante of gt_boxes
                gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                # update gt bounding box according the trip
                gt_boxes[:, 1].clamp_(0, trim_size - 1)
                gt_boxes[:, 3].clamp_(0, trim_size - 1)

                # shift y coordiante of rois
                rois[:, 2] = rois[:, 2] - float(y_s)
                rois[:, 4] = rois[:, 4] - float(y_s)

                # update rois bounding box according the trip
                rois[:, 2].clamp_(0, trim_size - 1)
                rois[:, 4].clamp_(0, trim_size - 1)

            elif data_width > self.trim_width:
                # this means that data_width > data_height, we need to crop the
                # data_width
                min_x = int(torch.min(gt_boxes[:, 0]))
                max_x = int(torch.max(gt_boxes[:, 2]))
                trim_size = self.trim_width
                box_region = max_x - min_x + 1
                if min_x == 0:
                    x_s = 0
                else:
                    if (box_region - trim_size) < 0:
                        x_s_min = max(max_x - trim_size, 0)
                        x_s_max = min(min_x, data_width - trim_size)
                        if x_s_min == x_s_max:
                            x_s = x_s_min
                        else:
                            x_s = np.random.choice(range(x_s_min, x_s_max))
                    else:
                        x_s_add = int((box_region - trim_size) / 2)
                        if x_s_add == 0:
                            x_s = min_x
                        else:
                            x_s = np.random.choice(
                                range(min_x, min_x + x_s_add))

                # crop the image
                data = data[:, :, x_s:(x_s + trim_size), :]

                # shift x coordiante of gt_boxes
                gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                # update gt bounding box according the trip
                gt_boxes[:, 0].clamp_(0, trim_size - 1)
                gt_boxes[:, 2].clamp_(0, trim_size - 1)

                # shift x coordiante of rois
                rois[:, 1] = rois[:, 1] - float(x_s)
                rois[:, 3] = rois[:, 3] - float(x_s)
                # update gt bounding box according the trip
                rois[:, 1].clamp_(0, trim_size - 1)
                rois[:, 3].clamp_(0, trim_size - 1)
            elif data_width > self.trim_width and data_height > self.trim_height:
                raise ValueError(
                    "width > trim_width and height > trim_height, this should not happen!"
                )
            else:
                # data_width == self.trim_width and data_height == self.trim_height
                trim_size = min(data_width, data_height)

            padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
            padding_data = data[0][:trim_size, :trim_size, :]
            gt_boxes[:, :4].clamp_(0, trim_size)
            rois[:, 1:5].clamp_(0, trim_size)
            im_info[0, 0] = trim_size
            im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

            # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            rois_not_keep = (rois[:, 1] == rois[:, 3]) | (rois[:, 2]
                                                          == rois[:, 4])
            rois_keep = torch.nonzero(rois_not_keep == 0).view(-1)
            rois = rois[rois_keep]

            max_num_rois = 2000
            num_rois = min(rois.size(0), max_num_rois)
            rois_padding = torch.FloatTensor(max_num_rois,
                                             gt_boxes.size(1)).zero_()
            rois_padding[:num_rois, :] = rois[:num_rois]

            return padding_data, im_info, gt_boxes_padding, num_boxes, rois_padding
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes, rois
示例#2
0
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(
                    int(np.ceil(data_width / ratio)), data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(
                    data_height, int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            return padding_data, im_info, gt_boxes_padding, num_boxes
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes
示例#3
0
    def __getitem__(self, index):
        index_ratio = int(self.ratio_index[index])

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]

        blobs = get_minibatch(minibatch_db, self._num_classes)
        # print(self.list_ind)

        blobs['gt_boxes'] = [
            x for x in blobs['gt_boxes'] if x[-1] in self.list_ind
        ]
        blobs['gt_boxes'] = np.array(blobs['gt_boxes'])

        if self.training:
            # Random choice query catgory
            try:
                catgory = blobs['gt_boxes'][:, -1]
            except:
                print(blobs['gt_boxes'])
                exit(0)
            cand = np.unique(catgory)
            if len(cand) == 1:
                choice = cand[0]

                cla = self.class2cat[int(choice)]  #--------------->
                sketch_array = self.cat2sketch[cla]
                # print(sketch_array)
                sketch = random.choices(sketch_array, k=4)
                sketch_array = []
                for sk in sketch:  # ------>  Uncomment for sketches
                    sk = pickle.load(open(sk, 'rb'))
                    key = list(sk.keys())[0]
                    sk = convert_to_np_raw(sk[key])
                    sk = np.stack((sk, sk, sk), axis=0) / 255.0
                    sketch_array.append(sk)
                sketch_array = np.stack(sketch_array, axis=0)  #------------->

                # sketch = random.choice(sketch_array)
                # sketch = pickle.load(open(sketch, 'rb'))
                # key = list(sketch.keys())[0]
                # sketch = convert_to_np_raw(sketch[key])
                # sketch = np.stack((sketch, sketch, sketch), axis=0)/255.0

            else:
                p = []
                for i in cand:
                    p.append(self.show_time[i])
                p = np.array(p)
                p /= p.sum()
                choice = np.random.choice(cand, 1, p=p)[0]

                cla = self.class2cat[int(choice)]  # -------------->
                sketch_array = self.cat2sketch[cla]
                sketch = random.choices(sketch_array, k=4)
                sketch_array = []
                for sk in sketch:
                    sk = pickle.load(open(
                        sk, 'rb'))  # ------> Uncomment for sketches
                    key = list(sk.keys())[0]
                    sk = convert_to_np_raw(sk[key])
                    sk = np.stack((sk, sk, sk), axis=0) / 255.0
                    sketch_array.append(sk)
                sketch_array = np.stack(sketch_array,
                                        axis=0)  # --------------->

            # Delete useless gt_boxes
            blobs['gt_boxes'][:, -1] = np.where(
                blobs['gt_boxes'][:, -1] == choice, 1, 0)
            # Get query image
            # print(sketch.shape)
            # query = self.load_query(choice) # Uncomment for images
            # print(query.shape)
            # exit(0)
            query = sketch_array  # Uncomment for sketches

        else:
            # query = self.load_query(index, minibatch_db[0]['img_id']) # Comment for sketches
            # ''' # Uncomment for sketches
            catgory = self.cat_list[index]
            # list all the candidate image
            # all_data = self._query[catgory]

            # Use image_id to determine the random seed
            # The list l is candidate sequence, which random by image_id
            # print(catgory)
            # exit()
            id = minibatch_db[0]['img_id']
            random.seed(id)
            # l = list(range(len(all_data)))
            # random.shuffle(l)
            cla = self.class2cat[int(catgory)]
            # print(cla)
            sketch_array = self.cat2sketch[cla]
            sketch_data_array = []
            random.shuffle(sketch_array)
            #print(sketch_array)
            for sketch in sketch_array[0:20]:
                sketch = pickle.load(open(sketch, 'rb'))
                key = list(sketch.keys())[0]
                sketch = convert_to_np_raw(sketch[key])
                # intrim_sketch = self.toTensor(sketch)
                # save_image(intrim_sketch, 'outfile.jpg')
                sketch = np.stack((sketch, sketch, sketch), axis=0) / 255.0
                # print(sketch.shape)
                # im = Image.fromarray(sketch)
                # im.save('outfile'+str(sketch_num)+'.jpg')

                # exit(0)
                sketch_data_array.append(sketch)
            query = np.stack(sketch_data_array)

            # choose the candidate sequence and take out the data information
            # position=l[self.query_position%len(l)]
            # data     = all_data[position]
            # '''

        data = torch.from_numpy(blobs['data'])
        # query = torch.from_numpy(query)
        query = torch.from_numpy(query).contiguous()  # Uncomment for sketches
        # query = torch.from_numpy(query) # Comment for sketches
        # query = query.permute(0, 3, 1, 2).contiguous().squeeze(0) # Comment for the case of sketches
        im_info = torch.from_numpy(blobs['im_info'])

        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < 10
            # print(not_keep)
            # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < torch.FloatTensor([10]) | (gt_boxes[:,3] - gt_boxes[:,1]) < torch.FloatTensor([10])

            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            return padding_data, query, im_info, gt_boxes_padding, num_boxes
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            # gt_boxes = torch.FloatTensor([1,1,1,1,1])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            choice = self.cat_list[index]

            return data, query, im_info, gt_boxes, choice
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])

        Flag = False
        if blobs['gt_boxes'].size == 0:
            gt_boxes = torch.FloatTensor([0, 0, 10, 10, 0]).view(1, -1)
            Flag = True
        else:
            #np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

        gt_boxes = torch.from_numpy(blobs['gt_boxes'])

        if blobs['gt_relations'].size == 0:
            gt_relations = torch.LongTensor([0, 0, 0]).view(1, -1)
        else:
            gt_relations = torch.from_numpy(blobs['gt_relations']).long()

        # append gt_attributes to gt_boxes
        gt_att_mat = gt_boxes.new(gt_boxes.size(0), 16).zero_()
        gt_boxes = torch.cat((gt_boxes, gt_att_mat), 1)

        # append gt_relations to gt_boxes
        gt_rels_mat = gt_boxes.new(gt_boxes.size(0), gt_boxes.size(0)).zero_()
        gt_rels_mat[gt_relations[:, 0],
                    gt_relations[:, 1]] = gt_relations[:, 2].float()
        gt_boxes = torch.cat((gt_boxes, gt_rels_mat), 1)

        im_info = torch.from_numpy(blobs['im_info'])
        data_height, data_width = data.size(1), data.size(2)

        if self.training:
            # we need to random shuffle the bounding box.
            ratio = self.ratio_list_batch[index]
            # np.random.shuffle(blobs['gt_boxes'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE: need to cope with vanished gt boxes after cropping

            # get the index range
            # if the image need to crop, crop to the target size.

            if self.need_crop[index_ratio] > 0:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    y_s = 0
                    box_region = max_y - min_y + 1
                    # if min_y == 0:
                    #     y_s = 0
                    # else:
                    #     if (box_region-trim_size) < 0:
                    #         y_s_min = max(max_y-trim_size, 0)
                    #         y_s_max = min(min_y, data_height-trim_size)
                    #         if y_s_min == y_s_max:
                    #             y_s = y_s_min
                    #         else:
                    #             y_s = np.random.choice(range(y_s_min, y_s_max))
                    #     else:
                    #         y_s_add = int((box_region-trim_size)/2)
                    #         if y_s_add == 0:
                    #             y_s = min_y
                    #         else:
                    #             y_s = np.random.choice(range(min_y, min_y+y_s_add))
                    # crop the image
                    if trim_size <= 0:
                        pdb.set_trace()
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - y_s
                    gt_boxes[:, 3] = gt_boxes[:, 3] - y_s

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.floor(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    x_s = 0
                    # box_region = max_x - min_x + 1
                    # if min_x == 0:
                    #     x_s = 0
                    # else:
                    #     if (box_region-trim_size) < 0:
                    #         x_s_min = max(max_x-trim_size, 0)
                    #         x_s_max = min(min_x, data_width-trim_size)
                    #         if x_s_min == x_s_max:
                    #             x_s = x_s_min
                    #         else:
                    #             x_s = np.random.choice(range(x_s_min, x_s_max))
                    #     else:
                    #         x_s_add = int((box_region-trim_size)/2)
                    #         if x_s_add == 0:
                    #             x_s = min_x
                    #         else:
                    #             x_s = np.random.choice(range(min_x, min_x+x_s_add))
                    # crop the image
                    if trim_size <= 0:
                        pdb.set_trace()
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - x_s
                    gt_boxes[:, 2] = gt_boxes[:, 2] - x_s
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()
                data_height = data[0].size(0)
                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                data_width = data[0].size(1)
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                gt_boxes.clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            if gt_boxes.size(0) > self.max_num_box:
                if not cfg.HAS_RELATIONS:
                    gt_boxes = gt_boxes[:self.max_num_box]
                else:
                    gt_boxes = gt_boxes[:self.max_num_box, :(self.max_num_box +
                                                             21)]

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(
                self.max_num_box, self.max_num_box + 21).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                if cfg.HAS_RELATIONS:
                    gt_boxes = gt_boxes[:,
                                        torch.cat((torch.arange(0, 21).long(),
                                                   keep + 21), 0)]

                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :gt_boxes.
                                 size(1)] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0
            # take the top num_boxes
            # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            if self.normalize:
                padding_data = padding_data / 255.0
                padding_data = self.normalize(padding_data)
            return padding_data, im_info, gt_boxes_padding, num_boxes
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            num_boxes = gt_boxes.size(0)
            im_info = im_info.view(3)

            if self.normalize:
                data = data / 255.0
                data = self.normalize(data)

            if Flag:
                num_boxes = 0
            else:
                num_boxes = min(gt_boxes.size(0), self.max_num_box)

            return data, im_info, gt_boxes, num_boxes
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = self._roidb[index_ratio]
        blobs = get_minibatch_allinone(minibatch_db)
        blobs = self._imagePreprocess(blobs)

        data = torch.from_numpy(blobs['data'].copy())
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(0), data.size(1)
        if self.training:
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            gt_grasps = torch.from_numpy(blobs['gt_grasps'])
            gt_grasp_inds = torch.from_numpy(blobs['gt_grasp_inds'])

            # shuffle boxes
            shuffle_inds_b = range(blobs['gt_boxes'].shape[0])
            np.random.shuffle(shuffle_inds_b)
            shuffle_inds_b = torch.LongTensor(shuffle_inds_b)
            gt_boxes = gt_boxes[shuffle_inds_b]
            gt_grasp_inds = self._graspIndsPostProcess(
                gt_grasp_inds, shuffle_inds_b.data.numpy(), blobs['node_inds'])

            # shuffle grasps
            shuffle_inds_g = range(blobs['gt_grasps'].shape[0])
            np.random.shuffle(shuffle_inds_g)
            shuffle_inds_g = torch.LongTensor(shuffle_inds_g)
            gt_grasps = gt_grasps[shuffle_inds_g]
            gt_grasp_inds = gt_grasp_inds[shuffle_inds_g]

            # if batch_size > 1, all images need to be processed to have the same size
            if self.batch_size > 1:
                ratio = self.ratio_list_batch[index]
                # if the image need to crop, crop to the target size.
                coord_s = (0, 0)
                if self._roidb[index_ratio]['need_crop']:
                    # here image cropping is according to both gt_boxes and gt_grasps
                    data, coord_s = self._cropImage(
                        data, torch.cat((gt_grasps, gt_boxes), dim=-1), ratio)
                # based on the ratio, padding the image.
                data, im_info = self._paddingImage(data, im_info, ratio)
                # crpo bbox according to cropped image
                gt_boxes = self._cropBox(data, coord_s, gt_boxes)
                gt_grasps, _, gt_grasp_inds = self._cropGrasp(
                    data, coord_s, gt_grasps, gt_grasp_inds)

            gt_boxes, keep = self._boxPostProcess(gt_boxes)
            gt_grasps, num_grasps, gt_grasp_inds = self._graspPostProcess(
                gt_grasps, gt_grasp_inds)

            shuffle_inds_b = shuffle_inds_b[keep]
            rel_mat = self._genRelMat(shuffle_inds_b, blobs['node_inds'],
                                      blobs['child_lists'],
                                      blobs['parent_lists'])

            # permute trim_data to adapt to downstream processing
            data = data.permute(2, 0, 1).contiguous()
            assert data.size(1) == im_info[0] and data.size(2) == im_info[1]
            return data, im_info, gt_boxes, gt_grasps, keep.size(
                0), num_grasps, rel_mat, gt_grasp_inds
        else:
            data = data.permute(2, 0, 1).contiguous()
            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            gt_grasps = torch.FloatTensor([1, 1, 1, 1, 1, 1, 1, 1])
            gt_grasp_inds = torch.LongTensor([0])
            num_boxes = 0
            num_grasps = 0
            rel_mat = torch.FloatTensor([0])
            return data, im_info, gt_boxes, gt_grasps, num_boxes, num_grasps, rel_mat, gt_grasp_inds
    def __getitem__(self, index):
        self.trim_size = min(self.trim_height, self.trim_width)
        minibatch_db = [self._roidb[index]]
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        rois = torch.from_numpy(blobs['rois'])
        image_classes = torch.from_numpy(blobs['image_classes'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)

        if self.training:
            # np.random.shuffle(blobs['gt_boxes'])
            # np.random.shuffle(blobs['weak_gt_boxes'])

            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            num_boxes = gt_boxes.size(0)
            wgt_boxes = torch.from_numpy(blobs['weak_gt_boxes'])
            wnum_boxes = wgt_boxes.size(0)

            avaiable_boxes = torch.from_numpy(
                np.vstack((blobs['gt_boxes'], blobs['weak_gt_boxes'])))

            # avaiable_boxes = torch.from_numpy(blobs['gt_boxes'])

            if data_height > self.trim_height:
                # this means that data_width < data_height, we need to crop the
                # data_height
                min_y = int(torch.min(avaiable_boxes[:, 1]))
                max_y = int(torch.max(avaiable_boxes[:, 3]))
                trim_size = self.trim_height
                box_region = max_y - min_y + 1

                if min_y == 0:
                    y_s = 0
                else:
                    if (box_region - trim_size) < 0:
                        y_s_min = max(max_y - trim_size, 0)
                        y_s_max = min(min_y, data_height - trim_size)
                        if y_s_min == y_s_max:
                            y_s = y_s_min
                        else:
                            y_s = np.random.choice(range(y_s_min, y_s_max))

                    else:
                        y_s_add = int((box_region - trim_size) / 2)
                        if y_s_add == 0:
                            y_s = min_y
                        else:
                            y_s = np.random.choice(range(min_y, min_y + y_s_add))

                # crop the image
                data = data[:, y_s:(y_s + trim_size), :, :]

                # shift y coordiante of gt_boxes
                if num_boxes > 0:
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                # shift y coordiante of weak gt_boxes
                if wnum_boxes > 0:
                    wgt_boxes[:, 1] = wgt_boxes[:, 1] - float(y_s)
                    wgt_boxes[:, 3] = wgt_boxes[:, 3] - float(y_s)
                    wgt_boxes[:, 1].clamp_(0, trim_size - 1)
                    wgt_boxes[:, 3].clamp_(0, trim_size - 1)

                # shift y coordiante of rois
                rois[:, 2] = rois[:, 2] - float(y_s)
                rois[:, 4] = rois[:, 4] - float(y_s)
                rois[:, 2].clamp_(0, trim_size - 1)
                rois[:, 4].clamp_(0, trim_size - 1)

            if data_width > self.trim_width:
                # this means that data_width > data_height, we need to crop the
                # data_width
                min_x = int(torch.min(avaiable_boxes[:, 0]))
                max_x = int(torch.max(avaiable_boxes[:, 2]))
                trim_size = self.trim_width
                box_region = max_x - min_x + 1
                if min_x == 0:
                    x_s = 0
                else:
                    if (box_region - trim_size) < 0:
                        x_s_min = max(max_x - trim_size, 0)
                        x_s_max = min(min_x, data_width - trim_size)
                        if x_s_min == x_s_max:
                            x_s = x_s_min
                        else:
                            x_s = np.random.choice(range(x_s_min, x_s_max))
                    else:
                        x_s_add = int((box_region - trim_size) / 2)
                        if x_s_add == 0:
                            x_s = min_x
                        else:
                            x_s = np.random.choice(range(min_x, min_x + x_s_add))

                # crop the image
                data = data[:, :, x_s:(x_s + trim_size), :]

                # shift x coordiante of gt_boxes
                if num_boxes > 0:
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

                # shift x coordiante of gt_boxes
                if wnum_boxes > 0:
                    wgt_boxes[:, 0] = wgt_boxes[:, 0] - float(x_s)
                    wgt_boxes[:, 2] = wgt_boxes[:, 2] - float(x_s)
                    wgt_boxes[:, 0].clamp_(0, trim_size - 1)
                    wgt_boxes[:, 2].clamp_(0, trim_size - 1)

                # shift x coordiante of rois
                rois[:, 1] = rois[:, 1] - float(x_s)
                rois[:, 3] = rois[:, 3] - float(x_s)
                rois[:, 1].clamp_(0, trim_size - 1)
                rois[:, 3].clamp_(0, trim_size - 1)

            trim_size = min(self.trim_width, self.trim_height)
            padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
            real_height = min(trim_size, data_height)
            real_width = min(trim_size, data_width)
            padding_data[:real_height, :real_width, :] = data[0][:real_height, :real_width, :]

            rois[:, 1:5].clamp_(0, trim_size)
            im_info[0, 0] = trim_size
            im_info[0, 1] = trim_size

            # check the bounding box:
            if num_boxes > 0:
                gt_boxes[:, :4].clamp_(0, trim_size)
                not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (
                    gt_boxes[:, 1] == gt_boxes[:, 3])
                keep = torch.nonzero(not_keep == 0).view(-1)
                gt_boxes_padding = torch.FloatTensor(
                    self.max_num_box, gt_boxes.size(1)).zero_()
                if keep.numel() != 0:
                    gt_boxes = gt_boxes[keep]
                    num_boxes = min(gt_boxes.size(0), self.max_num_box)
                    gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
                else:
                    num_boxes = 0
            else:
                gt_boxes_padding = torch.FloatTensor(
                    self.max_num_box, 5).zero_()
                num_boxes = 0

            # check the weak bounding box:
            if wnum_boxes > 0:
                wgt_boxes[:, :4].clamp_(0, trim_size)
                wnot_keep = (wgt_boxes[:, 0] == wgt_boxes[:, 2]) | (
                    wgt_boxes[:, 1] == wgt_boxes[:, 3])
                wkeep = torch.nonzero(wnot_keep == 0).view(-1)
                wgt_boxes_padding = torch.FloatTensor(
                    self.max_num_box, wgt_boxes.size(1)).zero_()
                if wkeep.numel() != 0:
                    wgt_boxes = wgt_boxes[wkeep]
                    wnum_boxes = min(wgt_boxes.size(0), self.max_num_box)
                    wgt_boxes_padding[:wnum_boxes, :] = wgt_boxes[:wnum_boxes]
                else:
                    wnum_boxes = 0
            else:
                wgt_boxes_padding = torch.FloatTensor(
                    self.max_num_box, 5).zero_()
                wnum_boxes = 0

            # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            # padding rois
            rois_not_keep = (rois[:, 1] == rois[:, 3]) | (
                rois[:, 2] == rois[:, 4])
            rois_keep = torch.nonzero(rois_not_keep == 0).view(-1)
            rois = rois[rois_keep]
            max_num_rois = 2000
            num_rois = min(rois.size(0), max_num_rois)
            rois_padding = torch.FloatTensor(
                max_num_rois, 5).zero_()
            rois_padding[:num_rois, :] = rois[:num_rois]

            return padding_data, im_info, gt_boxes_padding, num_boxes, wgt_boxes_padding, wnum_boxes, rois_padding, image_classes
        else:
            data = data.permute(0, 3, 1, 2).contiguous().view(
                3, data_height, data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes, rois, image_classes
示例#7
0
    answer = list(dict())

    for model_file in model_file_list:
        cnn = torch.load(model_file, map_location=device)
        cnn.eval()  # Change model to 'eval' mode .
        nets.append(cnn)

    with torch.set_grad_enabled(False):
        i = 0
        tick = time.time()
        for data, hash in test_generator:
            combined_classes = torch.zeros(6, device=device)
            for net in nets:
                # Here is the trick. The datagen generates batch of 1, but dataloader actually returns data in
                # batches with vaiable length. So we permutate dims to get a proper tensor
                outputs = net(data.permute((1,0,2)).to(device))
                classes = torch.softmax(outputs, 1).mean(0)
                combined_classes += classes
            winner = combined_classes.argmax().item()
            answer.append({'hash': hash[0], 'class': class_list[winner]})
            # print(winner)
            i += 1
            if i % 100 == 0:
                tock = time.time()
                time_to_go = (len(test_generator)-i) * len(test_generator) / 100 * (tock - tick)
                print('Batch {:d} / {:d}, {:.1f} sec, to go: {:.0f}'.format(
                    i,
                    len(test_generator),
                    tock - tick,
                    time_to_go)
                )
示例#8
0
    def __getitem__(self, index):
        here = self._image_set[index]
        im = imread(here['img_full_path'])
        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)

        raw_img = im.copy()
        # rgb -> bgr
        im = im[:, :, ::-1]
        gt_boxes = here['object_set'].copy()
        # random flip
        if self.training and np.random.rand() > 0.5:
            im = im[:, ::-1, :]
            raw_img = raw_img[:, ::-1, :].copy()
            flipped_gt_boxes = gt_boxes.copy()
            flipped_gt_boxes[:, 0] = im.shape[1] - gt_boxes[:, 2]
            flipped_gt_boxes[:, 2] = im.shape[1] - gt_boxes[:, 0]
            gt_boxes = flipped_gt_boxes

        if self.rotation:
            gt_boxes = to_center_form(gt_boxes)
            rotated_gt_boxes = gt_boxes.copy()
            h, w = im.shape[0], im.shape[1]
            angle = np.random.choice([0, 90, 180, 270])
            #im = rotate(im, angle)
            #raw_img = rotate(raw_img, angle)

            if angle == 90:
                im = im.transpose([1, 0, 2])[::-1, :, :].copy()
                raw_img = raw_img.transpose([1, 0, 2])[::-1, :, :].copy()

                rotated_gt_boxes[:,
                                 0], rotated_gt_boxes[:,
                                                      1] = gt_boxes[:,
                                                                    1], w - gt_boxes[:,
                                                                                     0]
                rotated_gt_boxes[:,
                                 2], rotated_gt_boxes[:,
                                                      3] = gt_boxes[:,
                                                                    3], gt_boxes[:,
                                                                                 2]
            elif angle == 180:
                im = im[::-1, ::-1, :].copy()
                raw_img = raw_img[::-1, ::-1, :].copy()

                rotated_gt_boxes[:,
                                 0], rotated_gt_boxes[:,
                                                      1] = w - gt_boxes[:,
                                                                        0], h - gt_boxes[:,
                                                                                         1]
            elif angle == 270:
                im = im.transpose([1, 0, 2])[:, ::-1, :].copy()
                raw_img = raw_img.transpose([1, 0, 2])[:, ::-1, :].copy()

                rotated_gt_boxes[:,
                                 0], rotated_gt_boxes[:,
                                                      1] = h - gt_boxes[:,
                                                                        1], gt_boxes[:,
                                                                                     0]
                rotated_gt_boxes[:,
                                 2], rotated_gt_boxes[:,
                                                      3] = gt_boxes[:,
                                                                    3], gt_boxes[:,
                                                                                 2]
            gt_boxes = to_point_form(rotated_gt_boxes)

        im = im.astype(np.float32, copy=False)
        if self.pd is not None:
            im = self.pd(im)
        im -= np.array([[[102.9801, 115.9465, 122.7717]]])
        im_shape = im.shape
        im_size_min = np.min(im_shape[0:2])

        if self.multi_scale:
            im_scale = np.random.choice([416, 500, 600, 720, 864
                                         ]) / float(im_size_min)
        else:
            im_scale = 600 / float(im_size_min)
        im = cv2.resize(im,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)

        data = torch.from_numpy(im)
        data_height, data_width = data.size(0), data.size(1)
        data = data.permute(2, 0, 1).contiguous()

        if self.training:
            np.random.shuffle(gt_boxes)

        box_categories = gt_boxes[:, 4].astype(np.long)
        for i in range(len(box_categories)):
            box_categories[i] = self._id_to_index[box_categories[i]]

        gt_boxes = gt_boxes[:, :4]
        gt_boxes *= im_scale

        gt_boxes = torch.from_numpy(gt_boxes)
        box_categories = torch.from_numpy(box_categories)
        #print(data, gt_boxes, data_height, data_width, im_scale, raw_img)
        return data, gt_boxes, box_categories, data_height, data_width, im_scale, raw_img, here[
            'id']
def run(modelcheckpoint, normalizeData, simfile):
    """
    """

    model = wresnet34x2().cpu()

    if os.path.isfile(modelcheckpoint):
        print("=> Loading checkpoint '{}'".format(modelcheckpoint))
        checkpoint = torch.load(modelcheckpoint,
                                map_location=lambda storage, loc: storage)
        best_acc = checkpoint['best_acc']
        print("This model had an accuracy of %.2f on the validation set." %
              (best_acc, ))
        keys = checkpoint['state_dict'].keys()
        for old_key in keys:
            new_key = old_key.replace('module.', '')
            checkpoint['state_dict'][new_key] = checkpoint['state_dict'].pop(
                old_key)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> Loaded checkpoint '{}' (epoch {})".format(
            modelcheckpoint, checkpoint['epoch']))
    else:
        print("=> No model checkpoint found. Exiting")
        return None

    cudnn.benchmark = False

    # Load the Normalizer function
    h = h5py.File(normalizeData, 'r')
    mean = torch.FloatTensor(h['mean'][:])
    mean = mean.permute(2, 0, 1)
    std_dev = torch.FloatTensor(h['std_dev'][:])
    std_dev = std_dev.permute(2, 0, 1)
    h.close()
    normalize = transforms.Normalize(mean=mean, std=std_dev)

    # Load simulation data
    time_freq_resolution = (384, 512)
    aca = ibmseti.compamp.SimCompamp(open(simfile, 'rb').read())
    complex_data = aca.complex_data()
    complex_data = complex_data.reshape(time_freq_resolution[0],
                                        time_freq_resolution[1])
    complex_data = complex_data * np.hanning(complex_data.shape[1])
    cpfft = np.fft.fftshift(np.fft.fft(complex_data), 1)
    spectrogram = np.abs(cpfft)
    features = np.stack(
        (np.log(spectrogram**2), np.arctan(cpfft.imag / cpfft.real)), -1)

    # create FloatTensor, permute to proper dimensional order, and normalize
    data = torch.FloatTensor(features)
    data = data.permute(2, 0, 1)
    data = normalize(data)

    # The model expects a 4D tensor
    s = data.size()
    data = data.contiguous().view(1, s[0], s[1], s[2])

    input_var = torch.autograd.Variable(data, volatile=True)

    model.eval()

    softmax = torch.nn.Softmax()
    softmax.zero_grad()
    output = model(input_var)
    probs = softmax(output).data.view(7).tolist()

    return probs
示例#10
0
    def __getitem__(self, index):
        im, gt_boxes, gt_categories, proposals, prop_scores, id, loader_index = self.get_raw_data(
            index)
        raw_img = im.copy()
        proposals, prop_scores = self.select_proposals(proposals, prop_scores)

        if self.warping and np.random.rand() > 0.8:
            src, dst = make_transform(im, gt_boxes)
            tform = PiecewiseAffineTransform()
            tform.estimate(src, dst)
            im = warp(im, tform, output_shape=(im.shape[0], im.shape[1]))
            raw_img = im.copy()

        # rgb -> bgr
        im = im[:, :, ::-1]

        # random flip
        # if self.training and np.random.rand() > 0.5:
        #     im = im[:, ::-1, :]
        #     raw_img = raw_img[:, ::-1, :].copy()
        #
        #     flipped_gt_boxes = gt_boxes.copy()
        #     flipped_gt_boxes[:, 0] = im.shape[1] - gt_boxes[:, 2]
        #     flipped_gt_boxes[:, 2] = im.shape[1] - gt_boxes[:, 0]
        #     gt_boxes = flipped_gt_boxes
        #
        #     flipped_xmin = im.shape[1] - proposals[:, 2]
        #     flipped_xmax = im.shape[1] - proposals[:, 0]
        #     proposals[:, 0] = flipped_xmin
        #     proposals[:, 2] = flipped_xmax

        if self.training and self.rotation:
            gt_boxes = to_center_form(gt_boxes)
            rotated_gt_boxes = gt_boxes.copy()
            h, w = im.shape[0], im.shape[1]
            angle = np.random.choice([0, 90, 180, 270])
            #im = rotate(im, angle)
            #raw_img = rotate(raw_img, angle)

            if angle == 90:
                im = im.transpose([1, 0, 2])[::-1, :, :].copy()
                raw_img = raw_img.transpose([1, 0, 2])[::-1, :, :].copy()

                rotated_gt_boxes[:,
                                 0], rotated_gt_boxes[:,
                                                      1] = gt_boxes[:,
                                                                    1], w - gt_boxes[:,
                                                                                     0]
                rotated_gt_boxes[:,
                                 2], rotated_gt_boxes[:,
                                                      3] = gt_boxes[:,
                                                                    3], gt_boxes[:,
                                                                                 2]
            elif angle == 180:
                im = im[::-1, ::-1, :].copy()
                raw_img = raw_img[::-1, ::-1, :].copy()

                rotated_gt_boxes[:,
                                 0], rotated_gt_boxes[:,
                                                      1] = w - gt_boxes[:,
                                                                        0], h - gt_boxes[:,
                                                                                         1]
            elif angle == 270:
                im = im.transpose([1, 0, 2])[:, ::-1, :].copy()
                raw_img = raw_img.transpose([1, 0, 2])[:, ::-1, :].copy()

                rotated_gt_boxes[:,
                                 0], rotated_gt_boxes[:,
                                                      1] = h - gt_boxes[:,
                                                                        1], gt_boxes[:,
                                                                                     0]
                rotated_gt_boxes[:,
                                 2], rotated_gt_boxes[:,
                                                      3] = gt_boxes[:,
                                                                    3], gt_boxes[:,
                                                                                 2]
            gt_boxes = to_point_form(rotated_gt_boxes)

        # cast to float type and mean subtraction
        im = im.astype(np.float32, copy=False)
        if self.pd is not None:
            im = self.pd(im)
            raw_img = self.pd(raw_img.astype(np.float32,
                                             copy=False)).astype(np.uint8)
        im -= np.array([[[102.9801, 115.9465, 122.7717]]])

        # image rescale
        im_shape = im.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])

        if self.multi_scale:
            im_scale = np.random.choice([416, 500, 600, 720, 864
                                         ]) / float(im_size_min)
            if im_size_max * im_scale > 1200:
                im_scale = 1200 / im_size_max
        else:
            im_scale = 600 / float(im_size_min)
        im = cv2.resize(im,
                        None,
                        None,
                        fx=im_scale,
                        fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)

        gt_boxes = gt_boxes * im_scale
        proposals = proposals * im_scale

        # to tensor
        data = torch.from_numpy(im)
        data = data.permute(2, 0, 1).contiguous()
        gt_boxes = torch.from_numpy(gt_boxes)
        proposals = torch.from_numpy(proposals)
        prop_scores = torch.from_numpy(prop_scores)
        gt_categories = torch.from_numpy(gt_categories)

        image_level_label = torch.zeros(80)
        for label in gt_categories:
            image_level_label[label] = 1.0
        return data, gt_boxes, gt_categories, proposals, prop_scores, image_level_label, im_scale, raw_img, id, loader_index
示例#11
0
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        minibatch_db = [self._roidb[index_ratio]]
        blobs = self.get_minibatch(minibatch_db, self._num_classes)

        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])

        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            ratio = self.ratio_list_batch[index]
            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height

                    boxes_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (boxes_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((boxes_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:

                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))

                    data = data[:, :, x_s:(x_s + trim_size), :]

                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)

                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            if ratio < 1:
                trim_size = int(np.floor(data_width / ratio))
                padding_data = torch.FloatTensor(
                    int(np.ceil(data_width / ratio)), data_width, 3).zero_()
                padding_data[:data_height, :, :] = data[0]
                im_info[0, 0] = padding_data.size(0)
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)
            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)

                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes, :]
            else:
                num_boxes = 0

            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            return padding_data, im_info, gt_boxes_padding, num_boxes

        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes
示例#12
0
def train_ccblock(model_options):
    # load datasets
    train_file_paths = ["/hhd12306-2/langruimin/ActivityNet1.3/resnet50_V2/conv5/video_{}.npy".format(i) for i in range(9654)]
    videoset = VideoDataset(train_file_paths)
    print(len(videoset))

    # create model
    model = RCCAModule_3d(2048,2)

    model_quan = Quantization(16, model_options.subCenters, 2048)

    params_path = os.path.join(model_options.model_save_path, model_options.params_filename)
    params_path_Q = os.path.join(model_options.model_save_path, model_options.Qparams_filename)
    if model_options.reload_params:
        print('Loading model params...')
        model.load_state_dict(torch.load(params_path))
        print('Done.')

    model = model.cuda()
    model_quan = model_quan.cuda()
    # optimizer
    optimizer = RAdam(
        model.parameters(),
        lr=1e-4,
        betas=(0.9, 0.999),
        weight_decay=1e-4
    )
    optimizer2 = RAdam(
        model_quan.parameters(),
        lr=1e-2,
        betas=(0.9, 0.999),
        weight_decay=1e-4
    )

    lr_C= ''
    lr_Q= ''

     # load the similarity matrix
    print("+++++++++loading similarity+++++++++")
    f = open("/home/langruimin/BLSTM_pytorch/data/activitynet-v1.3/Sim_K1_10_K2_5activitynet_V2.pkl", "rb")
    similarity = pkl.load(f)
    similarity = torch.ByteTensor(similarity.astype(np.uint8))
    f.close()
    print("++++++++++similarity loaded+++++++")
    # '''

    batch_idx = 1
    train_loss_rec = open(os.path.join(model_options.records_save_path, model_options.train_loss_filename), 'w')
    error_ = 0.
    loss_ = 0.
    num = 0
    neighbor = True
    neighbor_freq = 2
    print("##########start train############")
    trainloader = torch.utils.data.DataLoader(videoset, batch_size=12, shuffle=True,num_workers=4, pin_memory=True)
    model.train()
    model_quan.train()

    neighbor_loss = 0.0
    for l in range(60):

        if neighbor == True:
            # training
            for i, (data, index) in enumerate(trainloader):
                data = data.to(model_options.default_dtype)
                # data = data.unsqueeze(1)
                data = data.cuda()
                data = data.permute(0,2,1,3,4)

                output_ccblock_mean = torch.tanh(model(data))

                # quantization block
                Qhard, Qsoft, SoftDistortion, HardDistortion, JointCenter, error,_ = model_quan(output_ccblock_mean)
                Q_loss = 0.1 * SoftDistortion + HardDistortion + 0.1 * JointCenter

                optimizer2.zero_grad()
                Q_loss.backward(retain_graph=True)
                optimizer2.step()

                if l % neighbor_freq == 0:
                    # neighbor loss
                    similarity_select = torch.index_select(similarity, 0, index)
                    similarity_select = torch.index_select(similarity_select, 1, index).float().cuda()
                    neighbor_loss = torch.sum((torch.mm(output_ccblock_mean, output_ccblock_mean.transpose(0,1)) / output_ccblock_mean.shape[-1] - similarity_select).pow(2))

                    optimizer.zero_grad()
                    neighbor_loss.backward()
                    optimizer.step()

                error_ += error.item()
                loss_ += neighbor_loss.item()
                num += 1
                if batch_idx % model_options.disp_freq == 0:
                    info = "epoch{0} Batch {1} loss:{2:.3f}  distortion:{3:.3f} " \
                        .format(l, batch_idx, loss_/ num, error_ / num)
                    print(info)
                    train_loss_rec.write(info + '\n')

                batch_idx += 1
            batch_idx = 0
            error_ = 0.
            loss_ = 0.
            num = 0

        if (l+1) % model_options.save_freq == 0:
            print('epoch: ', l ,'New best model. Saving model ...')
            torch.save(model.state_dict(), params_path)
            torch.save(model_quan.state_dict(), params_path_Q)

            for param_group in optimizer.param_groups:
                lr_C = param_group['lr']
            for param_group in optimizer2.param_groups:
                lr_Q = param_group['lr']
            record_inf ="saved model at epoch {0} lr_C:{1} lr_Q:{2}".format(l, lr_C, lr_Q)
            train_loss_rec.write(record_inf + '\n')
        print("##########epoch done##########")

    print('train done. Saving model ...')
    torch.save(model.state_dict(), params_path)
    torch.save(model_quan.state_dict(), params_path_Q)
    print("##########train done##########")
    def __getitem__(self, index):
        index_ratio = int(self.ratio_index[index])

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]

        blobs = get_minibatch(minibatch_db, self._num_classes)

        # rajath
        blobs['gt_boxes'] = [
            x for x in blobs['gt_boxes'] if x[-1] in self.list_ind
        ]
        # blobs['gt_boxes'] = [x for x in blobs['gt_boxes'] if int(x[-1]) in self.sketchy_classes]
        blobs['gt_boxes'] = np.array(blobs['gt_boxes'])

        if self.training:
            # Random choice query catgory
            catgory = blobs['gt_boxes'][:, -1]

            cand = np.unique(catgory).astype(np.uint8)
            # cand = np.intersect1d(cand, self.sketchy_classes)
            # print ("index:", index, "\nindex_ratio:", index_ratio, "\ncatgory:", catgory, "\ncand:", cand, "\nsketchy_classes:", self.sketchy_classes)
            if len(cand) == 1:
                choice = cand[0]

            else:
                p = []
                for i in cand:
                    p.append(self.show_time[i])
                p = np.array(p)
                p /= p.sum()
                choice = np.random.choice(cand, 1, p=p)[0]

            # Delete useless gt_boxes
            blobs['gt_boxes'][:, -1] = np.where(
                blobs['gt_boxes'][:, -1] == choice, 1, 0)
            # Get query image
            query = self.load_query(choice)
        else:
            query = self.load_query(index, minibatch_db[0]['img_id'])

        data = torch.from_numpy(blobs['data'])
        query = torch.from_numpy(query)
        query = query.permute(0, 3, 1, 2).contiguous().squeeze(0)
        im_info = torch.from_numpy(blobs['im_info'])

        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < 10
            # print(not_keep)
            # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < torch.FloatTensor([10]) | (gt_boxes[:,3] - gt_boxes[:,1]) < torch.FloatTensor([10])

            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            return padding_data, query, im_info, gt_boxes_padding, num_boxes
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            # gt_boxes = torch.FloatTensor([1,1,1,1,1])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            choice = self.cat_list[index]

            return data, query, im_info, gt_boxes, choice
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
            #照这样看来,这里并不直接选择索引index的图片,而是选择按长宽比排序的第index个图片(ratio_index里面保存的是按长宽比排序的图片的索引)
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        '''  
    minibatch_db=[{}] 里面就一个字典
    
    box.shape=(n,4) n代表这张图片上有几个目标
  gt_classes.shape=(n)   例如:array([12, 15]) 代表第一个目标是第12类(猫),第二个目标是第15类(人)
  gt_overlaps.   overlaps.shape=(n,21) 值为0或1  默认0, 如果[2,20]=1代表该张图片上第2个目标是第20类的分割
  flipped= False(前5011个为false ,后5011个为True)
  seg_areas.shape=(n) 例如:array([ 19536., 168015.] 代表第一个目标box的面积是19536, 第二个目标box的面积是168015
  max_classes=[12,15]  表示第一个目标是第12类,第二个目标是滴15类
  max_overlaps=[1,1,1,..] 如果这张图片有n个obj ,那么就有n个1
  need_crop = 0或1   1:代表图片长宽比太大或者台太小,需要裁剪  0:代表不需要

    
    '''

        blobs = get_minibatch(minibatch_db, self._num_classes)
        '''
    blobs:{}

    data : im_blob.shape=(1,W,H,3)是经过尺寸调整的图片
    gt_boxes : shape=(n, 5)  5:[x1,y1,x2,y2,kind]
    im_info  : shape=(1,3) 3:w,h,scale
    img_id   : 00026

    '''

        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)

            return padding_data, im_info, gt_boxes_padding, num_boxes

        # '''train
        #   im_data.shape=(b,3,512,512)
        #   im_info.sahpe=(b,3)
        #   gt_boxes.sahpe=(b,20,5)一应该是这张图上有20个gt,5分别为4个坐标加一个类别  前n个是真正的gt,后面20-n都是0
        #   num_boxes = (n)
        # '''

        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes
示例#15
0
    def __getitem__(self, index):
        ' Get sample'

        # Load image
        id = self.ids[index]
        if self.coco:
            image = self.coco.loadImgs(id)[0]['file_name']
        im = Image.open('{}/{}'.format(self.path, image)).convert("RGB")

        # Randomly sample scale for resize during training
        resize = self.resize
        if isinstance(resize, list):
            resize = random.randint(self.resize[0], self.resize[-1])

        ratio = resize / min(im.size)
        if ratio * max(im.size) > self.max_size:
            ratio = self.max_size / max(im.size)
        im = im.resize((int(ratio * d) for d in im.size), Image.BILINEAR)

        if self.training:
            # Get annotations
            boxes, categories = self._get_target(id)
            boxes *= ratio

            # Random rotation, if self.rotate_augment
            random_angle = random.randint(0, 3) * 90
            if self.rotate_augment and random_angle != 0:
                # rotate by random_angle degrees.
                im = im.rotate(random_angle)
                x, y, w, h = boxes[:, 0].clone(), boxes[:, 1].clone(
                ), boxes[:, 2].clone(), boxes[:, 3].clone()
                if random_angle == 90:
                    boxes[:, 0] = y
                    boxes[:, 1] = im.size[1] - x - w
                    boxes[:, 2] = h
                    boxes[:, 3] = w
                elif random_angle == 180:
                    boxes[:, 0] = im.size[0] - x - w
                    boxes[:, 1] = im.size[1] - y - h
                elif random_angle == 270:
                    boxes[:, 0] = im.size[0] - y - h
                    boxes[:, 1] = x
                    boxes[:, 2] = h
                    boxes[:, 3] = w

            # Random horizontal flip
            if random.randint(0, 1):
                im = im.transpose(Image.FLIP_LEFT_RIGHT)
                boxes[:, 0] = im.size[0] - boxes[:, 0] - boxes[:, 2]

            # Apply image brightness, contrast etc augmentation
            if self.augment_brightness:
                brightness_factor = random.normalvariate(
                    1, self.augment_brightness)
                brightness_factor = max(0, brightness_factor)
                im = adjust_brightness(im, brightness_factor)
            if self.augment_contrast:
                contrast_factor = random.normalvariate(1,
                                                       self.augment_contrast)
                contrast_factor = max(0, contrast_factor)
                im = adjust_contrast(im, contrast_factor)
            if self.augment_hue:
                hue_factor = random.normalvariate(0, self.augment_hue)
                hue_factor = max(-0.5, hue_factor)
                hue_factor = min(0.5, hue_factor)
                im = adjust_hue(im, hue_factor)
            if self.augment_saturation:
                saturation_factor = random.normalvariate(
                    1, self.augment_saturation)
                saturation_factor = max(0, saturation_factor)
                im = adjust_saturation(im, saturation_factor)

            target = torch.cat([boxes, categories], dim=1)

        # Convert to tensor and normalize
        data = torch.ByteTensor(torch.ByteStorage.from_buffer(im.tobytes()))
        data = data.float().div(255).view(*im.size[::-1], len(im.mode))
        data = data.permute(2, 0, 1)

        for t, mean, std in zip(data, self.mean, self.std):
            t.sub_(mean).div_(std)

        # Apply padding
        pw, ph = ((self.stride - d % self.stride) % self.stride
                  for d in im.size)
        data = F.pad(data, (0, pw, 0, ph))

        if self.training:
            return data, target

        return data, id, ratio
示例#16
0
    def get_data(self,
                 index,
                 h_flip=False,
                 target_im_size=688,
                 square_img=False):
        im, gt_boxes, gt_categories, proposals, prop_scores, id, loader_index = self.get_raw_data(
            index)
        raw_img = im.copy()
        proposals, prop_scores = self.select_proposals(proposals, prop_scores)
        # rgb -> bgr
        im = im[:, :, ::-1]

        # horizontal flip
        if h_flip:
            im = im[:, ::-1, :]
            raw_img = raw_img[:, ::-1, :].copy()

            flipped_xmin = im.shape[1] - gt_boxes[:, 2]
            flipped_xmax = im.shape[1] - gt_boxes[:, 0]
            gt_boxes[:, 0] = flipped_xmin
            gt_boxes[:, 2] = flipped_xmax

            flipped_xmin = im.shape[1] - proposals[:, 2]
            flipped_xmax = im.shape[1] - proposals[:, 0]
            proposals[:, 0] = flipped_xmin
            proposals[:, 2] = flipped_xmax

        # cast to float type and mean subtraction
        im = im.astype(np.float32, copy=False)
        im -= np.array([[[102.9801, 115.9465, 122.7717]]])

        # image rescale
        im_shape = im.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])

        if square_img:
            x_scale = target_im_size / im_shape[1]
            y_scale = target_im_size / im_shape[0]
            im = cv2.resize(im,
                            None,
                            None,
                            fx=x_scale,
                            fy=y_scale,
                            interpolation=cv2.INTER_LINEAR)

            gt_boxes = gt_boxes * np.array(
                [x_scale, y_scale, x_scale, y_scale])
            proposals = proposals * np.array(
                [x_scale, y_scale, x_scale, y_scale])
            im_scale = [x_scale, y_scale]
        else:
            im_scale = target_im_size / float(im_size_max)

            if im_size_max * im_scale > 2000:
                im_scale = 2000 / im_size_max
            im = cv2.resize(im,
                            None,
                            None,
                            fx=im_scale,
                            fy=im_scale,
                            interpolation=cv2.INTER_LINEAR)

            gt_boxes = gt_boxes * im_scale
            proposals = proposals * im_scale

        # to tensor
        data = torch.tensor(im, dtype=torch.float32)
        data = data.permute(2, 0, 1).contiguous()
        gt_boxes = torch.tensor(gt_boxes, dtype=torch.float32)
        proposals = torch.tensor(proposals, dtype=torch.float32)
        prop_scores = torch.tensor(prop_scores, dtype=torch.float32)
        gt_categories = torch.tensor(gt_categories, dtype=torch.long)

        image_level_label = torch.zeros(self.num_classes, dtype=torch.uint8)
        for label in gt_categories:
            image_level_label[label] = 1
        return {
            'im_data': data,
            'gt_boxes': gt_boxes,
            'gt_labels': gt_categories,
            'proposals': proposals,
            'prop_scores': prop_scores,
            'image_level_label': image_level_label,
            'im_scale': im_scale,
            'raw_img': raw_img,
            'id': id
        }
示例#17
0
# initialize Generator & Discriminator
netG = Generator().to(device)
weights_init(netG)
print(netG)

netD = Discriminator().to(device)
weights_init(netD)
print(netD)

# load ".off" files
volumes = d.getAll(obj=obj, train=True, is_local=is_local, obj_ratio=obj_ratio)
print('Using ' + obj + ' Data')
volumes = volumes[..., np.newaxis].astype(np.float)
data = torch.from_numpy(volumes)
data = data.permute(0, 4, 1, 2, 3)
data = data.type(torch.FloatTensor)


# choose loss function
criterion = nn.BCELoss()
criterion2 = nn.MSELoss()

# fake/real labels
real_label = 1
fake_label = 0

# setup optimizers
optG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2))
optD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2))
示例#18
0
    def __getitem__(self, index):
        index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        blobs = get_minibatch(minibatch_db)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)

        data = data.permute(0, 3, 1,
                            2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)

        # gt_boxes = torch.FloatTensor([1,1,1,1,1])
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])

        all_cls_gt_boxes = gt_boxes.clone()

        cur_cls_id_list = []
        for i in range(gt_boxes.size(0)):
            if gt_boxes[i, 4] not in cur_cls_id_list:
                cur_cls_id_list.append(gt_boxes[i, 4])
        random.seed(0)
        chosen_cls = random.sample(cur_cls_id_list, k=1)[0]

        new_gt_boxes = []
        for i in range(gt_boxes.size(0)):
            if gt_boxes[i, 4] == chosen_cls:
                new_gt_boxes.append([
                    gt_boxes[i, 0], gt_boxes[i, 1], gt_boxes[i, 2],
                    gt_boxes[i, 3], chosen_cls
                ])
        gt_boxes = torch.from_numpy(np.asarray(new_gt_boxes))

        num_boxes = 0

        # get supports
        support_data_all = np.zeros(
            (self.testing_shot, 3, self.support_im_size, self.support_im_size),
            dtype=np.float32)
        current_gt_class_id = int(gt_boxes[0][4])
        pool = self.support_pool[current_gt_class_id]

        random.seed(index)
        selected_supports = random.sample(pool, k=self.testing_shot)

        for i, _path in enumerate(selected_supports):
            support_im = imread(_path)[:, :, ::-1]  # rgb -> bgr
            target_size = np.min(
                support_im.shape[0:2])  # don't change the size
            support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS,
                                             target_size, cfg.TRAIN.MAX_SIZE)
            _h, _w = support_im.shape[0], support_im.shape[1]
            if _h > _w:
                resize_scale = float(self.support_im_size) / float(_h)
                unfit_size = int(_w * resize_scale)
                support_im = cv2.resize(support_im,
                                        (unfit_size, self.support_im_size),
                                        interpolation=cv2.INTER_LINEAR)
            else:
                resize_scale = float(self.support_im_size) / float(_w)
                unfit_size = int(_h * resize_scale)
                support_im = cv2.resize(support_im,
                                        (self.support_im_size, unfit_size),
                                        interpolation=cv2.INTER_LINEAR)
            h, w = support_im.shape[0], support_im.shape[1]
            support_data_all[i, :, :h, :w] = np.transpose(
                support_im, (2, 0, 1))
        supports = torch.from_numpy(support_data_all)

        return data, im_info, gt_boxes, num_boxes, supports, all_cls_gt_boxes
示例#19
0
  def __getitem__(self, index):
    if self.training:
        index_ratio = int(self.ratio_index[index])
    else:
        index_ratio = index

    # get the anchor index for current sample index
    # here we set the anchor index to the last one
    # sample in this group
    minibatch_db = [self._roidb[index_ratio]]
    blobs = get_minibatch(minibatch_db, self._num_action_classes)
    data = torch.from_numpy(blobs['data'])
    im_info = torch.from_numpy(blobs['im_info'])
    # we need to random shuffle the bounding box.
    data_height, data_width = data.size(1), data.size(2)
    if self.training:
        # blobs['gt_boxes'], blobs['key_points'] = self.unison_shuffle(blobs['gt_boxes'], blobs['key_points']) 
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])
        sec_roi_boxes = torch.from_numpy(blobs['sec_roi_boxes'])
        key_points = torch.from_numpy(blobs['key_points'])
        img_id = blobs['img_id']
        img_name = blobs['img_name']

        ########################################################
        # padding the input image to fixed size for each group #
        ########################################################

        # NOTE1: need to cope with the case where a group cover both conditions. (done)
        # NOTE2: need to consider the situation for the tail samples. (no worry)
        # NOTE3: need to implement a parallel data loader. (no worry)
        # get the index range

        # if the image need to crop, crop to the target size.
        ratio = self.ratio_list_batch[index]

        if self._roidb[index_ratio]['need_crop']:
            if ratio < 1:
                # this means that data_width << data_height, we need to crop the
                # data_height
                min_y = int(torch.min(gt_boxes[:,1]))
                max_y = int(torch.max(gt_boxes[:,3]))
                trim_size = int(np.floor(data_width / ratio))
                if trim_size > data_height:
                    trim_size = data_height                
                box_region = max_y - min_y + 1
                if min_y == 0:
                    y_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        y_s_min = max(max_y-trim_size, 0)
                        y_s_max = min(min_y, data_height-trim_size)
                        if y_s_min == y_s_max:
                            y_s = y_s_min
                        else:
                            y_s = np.random.choice(range(y_s_min, y_s_max))
                    else:
                        y_s_add = int((box_region-trim_size)/2)
                        if y_s_add == 0:
                            y_s = min_y
                        else:
                            y_s = np.random.choice(range(min_y, min_y+y_s_add))
                # crop the image
                data = data[:, y_s:(y_s + trim_size), :, :]

                # shift y coordiante of gt_boxes
                gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)
                sec_roi_boxes[:,1] = sec_roi_boxes[:,1] - float(y_s)
                sec_roi_boxes[:,3] = sec_roi_boxes[:,3] - float(y_s)
                key_points[:,1,:] = key_points[:,1,:] - float(y_s)

                # update gt bounding box according the trip
                gt_boxes[:, 1].clamp_(0, trim_size - 1)
                gt_boxes[:, 3].clamp_(0, trim_size - 1)
                sec_roi_boxes[:,1].clamp_(0, trim_size-1)
                sec_roi_boxes[:,3].clamp_(0, trim_size-1)
                key_points[:,1,:].clamp_(0, trim_size - 1)

            else:
                # this means that data_width >> data_height, we need to crop the
                # data_width
                min_x = int(torch.min(gt_boxes[:,0]))
                max_x = int(torch.max(gt_boxes[:,2]))
                trim_size = int(np.ceil(data_height * ratio))
                if trim_size > data_width:
                    trim_size = data_width                
                box_region = max_x - min_x + 1
                if min_x == 0:
                    x_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        x_s_min = max(max_x-trim_size, 0)
                        x_s_max = min(min_x, data_width-trim_size)
                        if x_s_min == x_s_max:
                            x_s = x_s_min
                        else:
                            x_s = np.random.choice(range(x_s_min, x_s_max))
                    else:
                        x_s_add = int((box_region-trim_size)/2)
                        if x_s_add == 0:
                            x_s = min_x
                        else:
                            x_s = np.random.choice(range(min_x, min_x+x_s_add))
                # crop the image
                data = data[:, :, x_s:(x_s + trim_size), :]

                # shift x coordiante of gt_boxes
                gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                sec_roi_boxes[:,0] = sec_roi_boxes[:,0] - float(x_s)
                sec_roi_boxes[:,2] = sec_roi_boxes[:,2] - float(x_s)
                key_points[:,0,:] = key_points[:,0,:] - float(x_s)
                # update gt bounding box according the trip
                gt_boxes[:, 0].clamp_(0, trim_size - 1)
                gt_boxes[:, 2].clamp_(0, trim_size - 1)
                sec_roi_boxes[:,0].clamp_(0, trim_size-1)
                sec_roi_boxes[:,2].clamp_(0, trim_size-1)
                key_points[:,0,:].clamp_(0, trim_size - 1)

        # based on the ratio, padding the image.
        if ratio < 1:
            # this means that data_width < data_height
            trim_size = int(np.floor(data_width / ratio))

            padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                             data_width, 3).zero_()

            padding_data[:data_height, :, :] = data[0]
            # update im_info
            im_info[0, 0] = padding_data.size(0)
            # print("height %d %d \n" %(index, anchor_idx))
        elif ratio > 1:
            # this means that data_width > data_height
            # if the image need to crop.
            padding_data = torch.FloatTensor(data_height, \
                                             int(np.ceil(data_height * ratio)), 3).zero_()
            padding_data[:, :data_width, :] = data[0]
            im_info[0, 1] = padding_data.size(1)
        else:
            trim_size = min(data_height, data_width)
            padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
            padding_data = data[0][:trim_size, :trim_size, :]
            # gt_boxes.clamp_(0, trim_size)
            gt_boxes[:, :4].clamp_(0, trim_size)
            sec_roi_boxes[:, :4].clamp_(0, trim_size)
            key_points.clamp_(0, trim_size)
            im_info[0, 0] = trim_size
            im_info[0, 1] = trim_size


        # # check the bounding box:
        # not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3])
        # keep = torch.nonzero(not_keep == 0).view(-1)

        # assert gt_boxes [action_clses,5], key_points [NUM_GT_BOX,2], SEC_BOX_ROI [CONTEXT_NUM_ROIS,5]
        padding_gt_boxes = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_()
        padding_kp = torch.FloatTensor(self.max_num_box, key_points.size(1), 17).zero_()
        padding_sec_roi_boxes = torch.FloatTensor(self.max_num_sec_box, sec_roi_boxes.size(1)).zero_()
        num_sec_boxes = min(sec_roi_boxes.size(0), self.max_num_sec_box)
        num_kp = min(key_points.size(0), self.max_num_box)

        # random sampling or padding the sec_roi_boxes
        if sec_roi_boxes.size(0)> self.max_num_sec_box:
            cinds = npr.choice(np.arange(sec_roi_boxes.size(0)), size=self.max_num_sec_box,
                                                replace=False)
        elif sec_roi_boxes.size(0) > 0:
            cinds = npr.choice(np.arange(sec_roi_boxes.size(0)), size=self.max_num_sec_box,
                                                replace=True)
        assert(cinds.size == self.max_num_sec_box),"Secondary RoIs are not of correct size"

        # random sampling or padding the key_points
        if key_points.size(0)> self.max_num_box:
            kinds = npr.choice(np.arange(key_points.size(0)), size=self.max_num_box,
                                                replace=False)
        elif key_points.size(0) > 0:
            kinds = npr.choice(np.arange(key_points.size(0)), size=self.max_num_box,
                                                replace=True)
        assert(kinds.size == self.max_num_box),"Key_points are not of correct size"

        if gt_boxes.size(0)> self.max_num_box:
            ginds = npr.choice(np.arange(gt_boxes.size(0)), size=self.max_num_box,
                                                replace=False)
        elif gt_boxes.size(0) > 0:
            ginds = npr.choice(np.arange(gt_boxes.size(0)), size=self.max_num_box,
                                                replace=True)
        assert(ginds.size == self.max_num_box),"Gt_boxes are not of correct size"

        # permute trim_data to adapt to downstream processing
        padding_data = padding_data.permute(2, 0, 1).contiguous()
        im_info = im_info.view(3)
        padding_sec_roi_boxes = sec_roi_boxes[cinds]
        padding_kp = key_points[kinds]
        padding_gt_boxes = gt_boxes[ginds]

        return padding_data, im_info, padding_gt_boxes, padding_sec_roi_boxes, padding_kp, num_sec_boxes, num_kp, img_name
    else:
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])
        sec_roi_boxes = torch.from_numpy(blobs['sec_roi_boxes'])
        key_points = torch.from_numpy(blobs['key_points'])
        img_id = blobs['img_id']
        img_name = blobs['img_name']
        data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)
        num_sec_boxes = sec_roi_boxes.size(1)
        num_kp = 0

        return data, im_info, gt_boxes, sec_roi_boxes, key_points, num_sec_boxes, num_kp, img_name
示例#20
0
    def __getitem__(self, index):
        """
        Given an index of one image, take out corresponding dataset & labels
        subtract mean, rescale, crop, padding the image
        :param index: a number (23321 / 2134 / 455 / 1...)
        :return data: image pixels, 4D tensor (1, 3, h, w)
                im_info: 2D tensor [[h, w, scale_factor]]
                gt_boxes: 2D tensor [[x1, y1, x2, y2, cls], [], ...]
                num_boxes:
                box_info: link gt label, 2D tensor [[contactstate, handside, magnitude, unitdx, unitdy], [], ...]]
        """
        def unison_shuffled_copies(a, b):
            assert len(a) == len(b)
            p = np.random.permutation(len(a))
            return a[p], b[p]

        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get one roidb, e.g. [{}]
        minibatch_db = [self._roidb[index_ratio]]

        # blobs: a dict contains infos of an image (already subtracted pixel mean and resized to 600)
        #    {'data': 4D array (1, 3, h, w),
        #     'gt_boxes': 2D array [[x1, y1, x2, y2, cls], [], ...],
        #     'im_info':2D array [[h, w, scale_factor]],
        #     'img_id':xx,
        #     'box_info': 2D array [[contactstate, handside, magnitude, unitdx, unitdy], [], ...]]
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])  # 4D array (1, 3, h, w)
        im_info = torch.from_numpy(
            blobs['im_info'])  # 2D array [[h, w, scale_factor]]
        data_height, data_width = data.size(1), data.size(2)

        if self.training:
            # shuffle the bounding box.
            blobs['gt_boxes'], blobs['box_info'] = unison_shuffled_copies(
                blobs['gt_boxes'], blobs['box_info'])
            # np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            box_info = torch.from_numpy(blobs['box_info'])

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]
            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # if width < height, we need to crop the height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # if width > data_height, we need to crop the width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the width/height ratio, padding the image.
            # if width < height
            if ratio < 1:
                trim_size = int(np.floor(data_width / ratio))
                padding_data = torch.FloatTensor(
                    int(np.ceil(data_width / ratio)), data_width, 3).zero_()
                padding_data[:data_height, :, :] = data[0]
                im_info[0, 0] = padding_data.size(0)  # update im_info

            # if width > height
            elif ratio > 1:
                padding_data = torch.FloatTensor(
                    data_height, int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = ((gt_boxes[:, 2] - gt_boxes[:, 0]) < 10) * (
                (gt_boxes[:, 3] - gt_boxes[:, 1]) < 10)
            # not_keep = ((gt_boxes[:,2] - gt_boxes[:,0]) < 10) and ((gt_boxes[:,3] - gt_boxes[:,1]) < 10)
            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            box_info_padding = torch.FloatTensor(self.max_num_box,
                                                 box_info.size(1)).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                box_info = box_info[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
                box_info_padding[:num_boxes, :] = box_info[:num_boxes]
            else:
                num_boxes = 0

            # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)
            return padding_data, im_info, gt_boxes_padding, num_boxes, box_info_padding

        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)
            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            box_info = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0
            return data, im_info, gt_boxes, num_boxes, box_info
示例#21
0
    def __getitem__(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index
        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        # blobs: {'data': 图片矩阵, 'gt_boxes': [:, :5]=x1, y1, x2, y2, cls, 'im_info':[[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], img_id}
        blobs = get_minibatch(minibatch_db, self._num_classes)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)

        # 为 train 阶段准备标签
        if self.training:
            # 重排 gt_boxes
            # state = np.random.get_state()
            # np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            # # crowdsourced_classes 要和 gt_boxes 对上顺序
            if cfg.LABEL_SOURCE == 2:
                #     # 保证gt_boxes 和 crowdsourced_classes 同序打乱
                #     np.random.set_state(state)
                #     np.random.shuffle(blobs['crowdsourced_classes'])
                crowdsourced_classes = torch.from_numpy(
                    blobs['crowdsourced_classes'])
            else:
                crowdsourced_classes = None
            # print('cc: ', crowdsourced_classes)
            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            # crop
            # 宽高比不在0.5-2范围内 需要crop
            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)

            gt_boxes_padding = torch.LongTensor(self.max_num_box,
                                                gt_boxes.size(1)).zero_()
            # 填充 crowdsourced_classes
            if cfg.LABEL_SOURCE == 2:
                padding_crowdsourced_classes = torch.FloatTensor(
                    self.max_num_box, cfg.NUM_ANNOTATOR).zero_()
            if keep.numel() != 0:
                gt_boxes = gt_boxes[keep]
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
                if cfg.LABEL_SOURCE == 2:
                    padding_crowdsourced_classes[:
                                                 num_boxes] = crowdsourced_classes[:
                                                                                   num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)
            # data
            # torch.Size([3, 600, 800])
            # info
            # tensor([600.0000, 800.0000, 1.6000])
            # boxes
            # tensor([[108.8000, 33.6000, 566.4000, 286.4000, 8.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
            #         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])
            # num_boxes
            # 1
            if cfg.LABEL_SOURCE == 2:
                return padding_data, im_info, gt_boxes_padding, num_boxes, padding_crowdsourced_classes
            else:
                return padding_data, im_info, gt_boxes_padding, num_boxes
        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0

            return data, im_info, gt_boxes, num_boxes
  def __getitem__(self, index):
    if self.training:
        index_ratio = int(self.ratio_index[index]) #如果在训练中就按照长宽比的顺序依次取
    else:
        index_ratio = index

    # get the anchor index for current sample index
    # here we set the anchor index to the last one
    # sample in this group
    minibatch_db = [self._roidb[index_ratio]] # 这里其实是只含有一个roidb字典的列表
    blobs = get_minibatch(minibatch_db, self._num_classes)
    #因为实际上输入的只是一个随机序号,所以这个minibatch的bs只是1
    #将roi转化为大小为1的batch数据
    # bolb是一个字典,依次为 图片数据、gt_boxes、图片信息(长宽和缩放scale尺寸-->使得最短边为600)、image——id
    data = torch.from_numpy(blobs['data'])
    im_info = torch.from_numpy(blobs['im_info'])
    # we need to random shuffle the bounding box.
    data_height, data_width = data.size(1), data.size(2)
    if self.training:
        np.random.shuffle(blobs['gt_boxes']) # 将boxes打乱
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])

        ########################################################
        # padding the input image to fixed size for each group #
        ########################################################

        # NOTE1: need to cope with the case where a group cover both conditions. (done)
        # NOTE2: need to consider the situation for the tail samples. (no worry)
        # NOTE3: need to implement a parallel data loader. (no worry)
        # get the index range

        # if the image need to crop, crop to the target size.
        ratio = self.ratio_list_batch[index] #注意取这个ratio能够保证最终的ratio一致

        #当图片本身的ratio超过了最大值和最小值时,需要先做一下ratio
        if self._roidb[index_ratio]['need_crop']:
            if ratio < 1:
                # this means that data_width << data_height, we need to crop the
                # data_height
                min_y = int(torch.min(gt_boxes[:,1]))
                max_y = int(torch.max(gt_boxes[:,3]))
                trim_size = int(np.floor(data_width / ratio)) #这个ratio是batch中公用ed
                if trim_size > data_height:
                    trim_size = data_height                
                box_region = max_y - min_y + 1
                if min_y == 0:
                    y_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        y_s_min = max(max_y-trim_size, 0)
                        y_s_max = min(min_y, data_height-trim_size)
                        if y_s_min == y_s_max:
                            y_s = y_s_min
                        else:
                            y_s = np.random.choice(range(y_s_min, y_s_max))
                    else:
                        y_s_add = int((box_region-trim_size)/2)
                        if y_s_add == 0:
                            y_s = min_y
                        else:
                            y_s = np.random.choice(range(min_y, min_y+y_s_add))
                # crop the image
                data = data[:, y_s:(y_s + trim_size), :, :]

                # shift y coordiante of gt_boxes
                gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                # update gt bounding box according the trip
                gt_boxes[:, 1].clamp_(0, trim_size - 1)
                gt_boxes[:, 3].clamp_(0, trim_size - 1)

            else:
                # this means that data_width >> data_height, we need to crop the
                # data_width
                min_x = int(torch.min(gt_boxes[:,0]))
                max_x = int(torch.max(gt_boxes[:,2]))
                trim_size = int(np.ceil(data_height * ratio))
                if trim_size > data_width:
                    trim_size = data_width                
                box_region = max_x - min_x + 1
                if min_x == 0:
                    x_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        x_s_min = max(max_x-trim_size, 0)
                        x_s_max = min(min_x, data_width-trim_size)
                        if x_s_min == x_s_max:
                            x_s = x_s_min
                        else:
                            x_s = np.random.choice(range(x_s_min, x_s_max))
                    else:
                        x_s_add = int((box_region-trim_size)/2)
                        if x_s_add == 0:
                            x_s = min_x
                        else:
                            x_s = np.random.choice(range(min_x, min_x+x_s_add))
                # crop the image
                data = data[:, :, x_s:(x_s + trim_size), :]

                # shift x coordiante of gt_boxes
                gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                # update gt bounding box according the trip
                gt_boxes[:, 0].clamp_(0, trim_size - 1)
                gt_boxes[:, 2].clamp_(0, trim_size - 1)
        #刚刚只是解决了需要crop的图片,图片还不一定满足ratio的要求

        # based on the ratio, padding the image.
        if ratio < 1:
            # this means that data_width < data_height
            trim_size = int(np.floor(data_width / ratio))

            # 如果当前图像的width/height<1   则它的目标ratio会更小,说明要对高度进行padding
            padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                             data_width, 3).zero_()
            # 对高度进行padding,将输入图像放在上面,不用对gt_boxes坐标变换,也就是补0是在图片最下面
            padding_data[:data_height, :, :] = data[0]
            # update im_info
            im_info[0, 0] = padding_data.size(0)
            # print("height %d %d \n" %(index, anchor_idx))
        elif ratio > 1:
            # this means that data_width > data_height
            # if the image need to crop.
            # 目标宽高比width/heigth>1 说明原始的输入图像是矮矮胖胖的,则它的目标ratio会更大
            # 为了让它变得更加矮矮胖胖,就填充宽度,将原始图像paste到左边,补0补在最右边
            padding_data = torch.FloatTensor(data_height, \
                                             int(np.ceil(data_height * ratio)), 3).zero_()
            padding_data[:, :data_width, :] = data[0]
            im_info[0, 1] = padding_data.size(1)
        else:
            trim_size = min(data_height, data_width)
            padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
            padding_data = data[0][:trim_size, :trim_size, :]
            # gt_boxes.clamp_(0, trim_size)
            gt_boxes[:, :4].clamp_(0, trim_size)
            im_info[0, 0] = trim_size
            im_info[0, 1] = trim_size

        #这里需要注意的是无论是crop还是将ratio修改为batch中一致的ratio,这里都没有采用resize的方式
        #crop是直接从图中抠
        #修改成batch_ratio是直接添加0
        #所以图片的缩放因子这里都不变,所以整个过程只做了一次resize操作
        #保证同一个batch_size中的输入图像宽高比相同(batch——ratio),同时最短边等于600
        #这样就保证了同一个batch的输入图像的分辨率是完全相同的
        #就不用再重新书写collate_fn函数组件一个batch

        # check the bounding box:
        not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3])
        keep = torch.nonzero(not_keep == 0).view(-1)

        gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() #[50, 5]用来存放标记
        if keep.numel() != 0:
            gt_boxes = gt_boxes[keep]
            num_boxes = min(gt_boxes.size(0), self.max_num_box)
            gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes]
        else:
            num_boxes = 0

            # permute trim_data to adapt to downstream processing [H, w, 3]->[3, H, W]
        padding_data = padding_data.permute(2, 0, 1).contiguous()
        im_info = im_info.view(3)
        # 训练数据的返回  im_info含有最终经过crop和padding之后的长宽以及第一次resize的scale因子
        # (之后并没有缩放,只是采用的补0或者crop的方式)
        # gt_boxes_padding [50, 5] num_boxes 就是一个int
        return padding_data, im_info, gt_boxes_padding, num_boxes
    else:
        data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)

        gt_boxes = torch.FloatTensor([1,1,1,1,1])
        num_boxes = 0

        return data, im_info, gt_boxes, num_boxes
示例#23
0
    def train_epoch(self, epoch):
        self.model.train()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        train_csv = os.path.join(self.csv_path, 'train.csv')
        pred_list, target_list, loss_list, pos_list = [],[],[],[]
        print ('epoch: ', epoch)

        
        
        for batch_idx, item in enumerate(self.train_loader):
            if self.cfig['model_name'] in ['disrnn']:
                data, target, dist = item
                data, target, dist = data.to(self.device), target.to(self.device), dist.to(self.device)
            else:
                data, target, ID = item
                data, target = data.to(self.device), target.to(self.device)
            

            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1,0,2,3,4])
            
            self.optim.zero_grad()
            #print ('=================',data.shape)
            if self.cfig['model_name'] in ['disrnn']:
                pred = self.model(data, dist)
            else:
                pred = self.model(data)             # here should be careful
            pred_prob = F.softmax(pred)
            
            if batch_idx == 0:
                print ('data.shape',data.shape)
                print ('pred.shape', pred.shape)
                print('Epoch: ', epoch)
            loss = nn.CrossEntropyLoss()(pred, target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 4)
            self.optim.step()
            print_str = 'train epoch=%d, batch_idx=%d/%d, loss=%.4f\n' % (
            epoch, batch_idx, len(self.train_loader), loss.data[0])
            #print(print_str)
            pred_cls = pred.data.max(1)[1]
            pos_list += pred_prob[:, 1].data.cpu().numpy().tolist()
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())
        try: 
            print (1000 * self.model.dislstmcell.a.grad, ' a grad')
            
            print (self.model.dislstmcell.a.data, self.model.dislstmcell.c.data)
            print (1000 * self.model.dislstmcell.c.grad, 'c grad')
            
        except:
            print ('a.grad none')    
        print (confusion_matrix(target_list, pred_list))
        accuracy=accuracy_score(target_list,pred_list)
        fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list)
        roc_auc = metrics.auc(fpr, tpr)
        #-------------------------save to csv -----------------------#
        if not os.path.exists(train_csv):
            csv_info = ['epoch', 'loss', 'auc', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(train_csv)
        df = pd.read_csv(train_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)
        tmp_auc = df['auc'].tolist()
        tmp_auc.append(roc_auc)
        #print('------------------', tmp_epoch)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)
        
        data['epoch'], data['loss'],data['auc'], data['accuracy'] =tmp_epoch, tmp_loss,tmp_auc, tmp_acc
        print ('train accuracy: ', accuracy, 'train auc: ', roc_auc)
        data.to_csv(train_csv)
示例#24
0
    X=torch.zeros((1,)).cpu(),
    Y=torch.ones((1, 2)).cpu(),
    opts=dict(
        xlabel='Epoch',
        ylabel='Gradient Norm',
        title='Gradien Norm - Step',
        legend=['D GN', 'G GN']
    )
)


for epoch in range(args.niter):
    for i, (data, target)  in enumerate(dataloader, 0):

        # permute B X D X C x H x W ==> B X C X D x H x W
        data = data.permute(0,2,1,3,4)
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        # train with real
        netD.zero_grad()
        real_cpu = data.to(device)
        batch_size = real_cpu.size(0)
        label = torch.full((batch_size,), real_label, device=device)
        # print('input ', real_cpu.shape)
        # print('label', label.shape)
        output = netD(real_cpu)
        # print('pred', output.shape)

        errD_real = criterion(output, label)
        errD_real.backward()
示例#25
0
    def eval_epoch(self, epoch):  
        
        self.model.eval()
        if not os.path.exists(self.csv_path):
            os.mkdir(self.csv_path)
        eval_csv = os.path.join(self.csv_path, 'eval.csv')
        pred_list, target_list, loss_list, pos_list = [],[],[],[]
        for batch_idx, item in enumerate(self.val_loader):
            if self.cfig['model_name'] in ['disrnn']:
                data, target, dist = item
                data, target, dist = data.to(self.device), target.to(self.device), dist.to(self.device)
                if batch_idx == 0: print (dist.shape)
            else:
                data, target, ID = item
                data, target = data.to(self.device), target.to(self.device)
            
            if self.cfig['model_name'][-3:] == 'rnn':
                data = data.permute([1,0,2,3,4])
            
            self.optim.zero_grad()
            if self.cfig['model_name'] in ['disrnn']:
                pred = self.model(data, dist)
            else:
                pred = self.model(data)   
            pred_prob = F.softmax(pred)
            #loss = self.criterion(pred, target)
            loss = nn.CrossEntropyLoss()(pred, target)
            pred_cls = pred.data.max(1)[1]  
            pos_list += pred_prob[:, 1].data.cpu().numpy().tolist()
            pred_list += pred_cls.data.cpu().numpy().tolist()
            target_list += target.data.cpu().numpy().tolist()
            loss_list.append(loss.data.cpu().numpy().tolist())
            
        
        accuracy=accuracy_score(target_list,pred_list)
        print (confusion_matrix(target_list, pred_list))
        fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list)
        roc_auc = metrics.auc(fpr, tpr) 
        #-------------------------save to csv -----------------------#
        if not os.path.exists(eval_csv):
            csv_info = ['epoch', 'loss', 'auc', 'accuracy']
            init_csv = pd.DataFrame()
            for key in csv_info:
                init_csv[key] = []
            init_csv.to_csv(eval_csv)
        df = pd.read_csv(eval_csv)
        data = pd.DataFrame()
        tmp_epoch = df['epoch'].tolist()
        tmp_epoch.append(epoch)

        #print ('------------------', tmp_epoch)
        tmp_loss = df['loss'].tolist()
        tmp_loss.append(np.mean(loss_list))
        tmp_auc = df['auc'].tolist()
        tmp_auc.append(roc_auc)
        tmp_acc = df['accuracy'].tolist()
        tmp_acc.append(accuracy)
        
        data['epoch'], data['loss'],data['auc'], data['accuracy'] =tmp_epoch, tmp_loss,tmp_auc, tmp_acc
        data.to_csv(eval_csv)
        print ('val accuracy: ', accuracy  , 'val auc: ', roc_auc)
        print ('max val auc at: ', max(tmp_auc), tmp_auc.index(max(tmp_auc)))
示例#26
0
 def format(self, rgb_array):
     data = torch.from_numpy(rgb_array).float().to(device='cuda')
     data /= 255
     data = data.permute([2, 0, 1])
     data = data.reshape([-1, 3, img_size, img_size])
     return data.reshape(-1, img_size * img_size * 3)
示例#27
0
    def _getitem_unfixed_size(self, index):
        if self.training:
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        minibatch_db = [self._roidb[index_ratio]]
        blobs = get_minibatch(minibatch_db, self._num_classes, self.training)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)
        if self.training:

            gt_grasps = None

            if 'gt_grasps' in blobs:
                shuffle_inds_gr = range(blobs['gt_grasps'].shape[0])
                np.random.shuffle(shuffle_inds_gr)
                shuffle_inds_gr = torch.LongTensor(shuffle_inds_gr)

                gt_grasps = torch.from_numpy(blobs['gt_grasps'])
                gt_grasps = gt_grasps[shuffle_inds_gr]

                if 'gt_grasp_inds' in blobs:
                    gt_grasps_inds = torch.from_numpy(blobs['gt_grasp_inds'])
                    gt_grasps_inds = gt_grasps_inds[shuffle_inds_gr]

            gt_boxes = None
            if 'gt_boxes' in blobs:
                shuffle_inds_bb = range(blobs['gt_boxes'].shape[0])
                np.random.shuffle(shuffle_inds_bb)
                shuffle_inds_bb = torch.LongTensor(shuffle_inds_bb)

                gt_boxes = torch.from_numpy(blobs['gt_boxes'])
                gt_boxes = gt_boxes[shuffle_inds_bb]

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            ratio = self.ratio_list_batch[index]

            if self._roidb[index_ratio]['need_crop']:
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            if y_s_add == 0:
                                y_s = min_y
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    data = data[:, y_s:(y_s + trim_size), :, :]
                    if gt_boxes is not None:
                        # shift y coordiante of gt_boxes
                        gt_boxes[:, :(gt_boxes.size(1) -
                                      1)][:, 1::2] -= float(y_s)
                        # update gt bounding box according the trip
                        gt_boxes[:, :(gt_boxes.size(1) - 1)][:, 1::2].clamp_(
                            0, trim_size - 1)

                    if gt_grasps is not None:
                        gt_grasps[:, 1::2] -= float(y_s)
                        keep = (
                            ((gt_grasps[:, 1::2] > 0) &
                             (gt_grasps[:, 1::2] < trim_size - 1)).sum(1) == 4)
                        gt_grasps = gt_grasps[keep]
                        shuffle_inds_gr = shuffle_inds_gr[keep]
                        if 'gt_grasp_inds' in blobs:
                            gt_grasps_inds = gt_grasps_inds[keep]

                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    if gt_boxes is not None:
                        # shift x coordiante of gt_boxes
                        gt_boxes[:, :(gt_boxes.size(1) -
                                      1)][:, 0::2] -= float(x_s)
                        # update gt bounding box according the trip
                        gt_boxes[:, :(gt_boxes.size(1) - 1)][:, 0::2].clamp_(
                            0, trim_size - 1)

                    if gt_grasps is not None:
                        gt_grasps[:, 0::2] -= float(x_s)
                        keep = (
                            ((gt_grasps[:, 0::2] > 0) &
                             (gt_grasps[:, 1::2] < trim_size - 1)).sum(1) == 4)
                        gt_grasps = gt_grasps[keep]
                        shuffle_inds_gr = shuffle_inds_gr[keep]
                        if 'gt_grasp_inds' in blobs:
                            gt_grasps_inds = gt_grasps_inds[keep]

            # based on the ratio, padding the image.
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                               data_width, 3).zero_()

                padding_data[:data_height, :, :] = data[0]
                # update im_info
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                               int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                if gt_boxes is not None:
                    # gt_boxes.clamp_(0, trim_size)
                    gt_boxes[:, :(gt_boxes.size(1) - 1)].clamp_(0, trim_size)
                if gt_grasps is not None:
                    keep = (((gt_grasps > 0) &
                             (gt_grasps < trim_size)).sum(1) == 8)
                    gt_grasps = gt_grasps[keep]
                    shuffle_inds_gr = shuffle_inds_gr[keep]
                    if 'gt_grasp_inds' in blobs:
                        gt_grasps_inds = gt_grasps_inds[keep]
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # grasp data
            num_grasps = 0
            gt_grasps_padding = torch.FloatTensor(self.max_num_grasp,
                                                  8).zero_()
            gt_grasp_inds_padding = torch.FloatTensor(
                self.max_num_grasp).zero_()

            if 'gt_grasps' in blobs:
                num_grasps = min(gt_grasps.size(0), self.max_num_grasp)
                gt_grasps_padding[:num_grasps, :] = gt_grasps[:num_grasps]
                if 'gt_grasp_inds' in blobs:
                    gt_grasp_inds_padding[:
                                          num_grasps] = gt_grasps_inds[:
                                                                       num_grasps]

            # object detection data
            # 4 coordinates (xmin, ymin, xmax, ymax) and 1 label
            num_boxes = 0
            gt_boxes_padding = torch.FloatTensor(self.max_num_box, 5).zero_()
            rel_mat = torch.FloatTensor(self.max_num_box,
                                        self.max_num_box).zero_()

            if 'gt_boxes' in blobs:
                # check the bounding box:
                not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (
                    gt_boxes[:, 1] == gt_boxes[:, 3])
                keep = torch.nonzero(not_keep == 0).view(-1)

                gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                     gt_boxes.size(1)).zero_()
                rel_mat = torch.FloatTensor(self.max_num_box,
                                            self.max_num_box).zero_()

                if keep.numel() != 0:
                    gt_boxes = gt_boxes[keep]
                    shuffle_inds_bb = shuffle_inds_bb[keep]

                    num_boxes = min(gt_boxes.size(0), self.max_num_box)
                    gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]

                    # get relationship matrix
                    if 'nodeinds' in blobs:
                        for o1 in range(num_boxes):
                            for o2 in range(num_boxes):
                                ind_o1 = blobs['nodeinds'][
                                    shuffle_inds_bb[o1].item()]
                                ind_o2 = blobs['nodeinds'][
                                    shuffle_inds_bb[o2].item()]
                                if ind_o2 == ind_o1 or rel_mat[o1,
                                                               o2].item() != 0:
                                    continue
                                o1_children = blobs['children'][
                                    shuffle_inds_bb[o1].item()]
                                o1_fathers = blobs['fathers'][
                                    shuffle_inds_bb[o1].item()]
                                if ind_o2 in o1_children:
                                    # o1 is o2's father
                                    rel_mat[o1, o2] = cfg.VMRN.FATHER
                                elif ind_o2 in o1_fathers:
                                    # o1 is o2's child
                                    rel_mat[o1, o2] = cfg.VMRN.CHILD
                                else:
                                    # o1 and o2 has no relationship
                                    rel_mat[o1, o2] = cfg.VMRN.NOREL

            # transfer index into sequence number of boxes returned, and filter out grasps belonging to dropped boxes.
            if 'gt_grasp_inds' in blobs:
                gt_grasp_inds_padding_ori = gt_grasp_inds_padding.clone()
                order2inds = dict(enumerate(blobs['nodeinds']))
                inds2order = dict(zip(order2inds.values(), order2inds.keys()))
                shuffle2order = dict(enumerate(shuffle_inds_bb.data.numpy()))
                order2shuffle = dict(
                    zip(shuffle2order.values(), shuffle2order.keys()))

                # make box index begins with 1
                for key in order2shuffle.keys():
                    order2shuffle[key] += 1

                for ind in blobs['nodeinds']:
                    gt_grasp_inds_padding[gt_grasp_inds_padding_ori == \
                                          float(ind)] = float(order2shuffle[inds2order[ind]])

            # permute trim_data to adapt to downstream processing
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(4)
            '''
            im2show = padding_data.clone().squeeze().permute(1, 2, 0).cpu().numpy()
            grasps2show = gt_grasps.clone().cpu().numpy()
            box2show = gt_boxes.clone().cpu().numpy()
            label2show = box2show[:, -1].astype(np.int32)
            box2show = box2show[:,:-1]
            inds2show = range(1, box2show.shape[0]+1)
            graspinds2show = gt_grasp_inds_padding.clone().cpu().numpy()
            print(blobs['img_id'])
            self._show_object_label(im2show, box2show, label2show, grasps2show, inds2show, graspinds2show)
            '''

            return padding_data, im_info, gt_boxes_padding, gt_grasps_padding, num_boxes, \
                   num_grasps, rel_mat, gt_grasp_inds_padding

        else:
            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(4)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            gt_grasps = torch.FloatTensor([1, 1, 1, 1, 1, 1, 1, 1])
            gt_grasp_inds = torch.FloatTensor([0])
            num_boxes = 0
            num_grasps = 0
            rel_mat = torch.FloatTensor([0])

            return data, im_info, gt_boxes, gt_grasps, num_boxes, num_grasps, rel_mat, gt_grasp_inds
示例#28
0
if args.cuda:
    if args.gpu != -1:
        torch.cuda.set_device(args.gpu)
        model = model.cuda()
    else:
        device_id = [0, 1, 2, 3]
        torch.cuda.set_device(device_id[0])
        model = nn.DataParallel(model, device_ids=device_id).cuda()

optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=30, gamma=0.5)

data = torch.rand(2, 3, 5).float().to(args.gpu)
print("data", data, data.shape)
data = data.permute(0, 2, 1)
print("data_p", data, data.shape)
model.train()
output = model(data)
print("output", output)

# def train(model, loader, epoch):
#     scheduler.step()
#     model.train()
#     torch.set_grad_enabled(True)
#     correct = 0
#     dataset_size = 0
#     for batch_idx, (data, target) in enumerate(loader):
#         # print("data", data, data.shape, "target", target)
#         dataset_size += data.shape[0]
#         data, target = data.float(), target.long().squeeze()
示例#29
0
    def __getitem__(self, index):  # only one sample
        # 如果在训练过程中
        if self.training:
            # s_ratio_list  -> 排列后的长宽比列表(从小到大)
            index_ratio = int(self.ratio_index[index])
        else:
            index_ratio = index

        # get the anchor index for current sample index
        # here we set the anchor index to the last one
        # sample in this group
        '''
      根据长宽比(从小到大)取出图片对应roi参数的字典{}
      {'boxes': boxes,
       'gt_classes': gt_classes,
       'gt_ishard': ishards,
       'gt_overlaps': overlaps,
       'flipped': False,  # 不翻转
       'seg_areas': seg_areas}
    '''
        # minibatch_db 是列表[]->里面是一张图片的roi字典
        minibatch_db = [self._roidb[index_ratio]]
        '''
    # 关键->得到blobs字典包含
    'data':图片(四维np)但其实只有一张图片的三维
    'need_backprop':一维np数组[.1]要BP
    'gt_boxes':Reg+cls用,二维np数组,每个目标有一个[]
    'im_info':二维np数组,图像的ID和序号,但只有1张图
    'img_id':int图片序号
    '''
        blobs = get_minibatch(minibatch_db, self._num_classes)
        # 把数据读入torch的变量中
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        # we need to random shuffle the bounding box.
        # 取图片的H和W
        data_height, data_width = data.size(1), data.size(2)
        # 如果在训练阶段
        if self.training:
            """
            da-faster-rcnn layer............
        """
            # 打乱bbox的顺序,并转移到torch
            np.random.shuffle(blobs['gt_boxes'])
            gt_boxes = torch.from_numpy(blobs['gt_boxes'])
            need_backprop = blobs['need_backprop'][0]

            ########################################################
            # padding the input image to fixed size for each group #
            ########################################################

            # NOTE1: need to cope with the case where a group cover both conditions. (done)
            # NOTE2: need to consider the situation for the tail samples. (no worry)
            # NOTE3: need to implement a parallel data loader. (no worry)
            # get the index range

            # if the image need to crop, crop to the target size.
            # 读入一个batch的目标长宽比
            ratio = self.ratio_list_batch[index]

            # 进行图片的裁剪(如果需要),data裁剪 + gt_boxes坐标改变
            if self._roidb[index_ratio]['need_crop']:
                # 如果是高图片
                if ratio < 1:
                    # this means that data_width << data_height, we need to crop the
                    # data_height
                    # 读取bbox的最高点和最低点
                    min_y = int(torch.min(gt_boxes[:, 1]))
                    max_y = int(torch.max(gt_boxes[:, 3]))
                    # 长边height需要裁剪成为的大小
                    trim_size = int(np.floor(data_width / ratio))
                    if trim_size > data_height:
                        trim_size = data_height
                    # bbox的最大距离
                    box_region = max_y - min_y + 1
                    if min_y == 0:
                        y_s = 0
                    else:
                        # bbox的最大距离 < 裁剪范围
                        if (box_region - trim_size) < 0:
                            # 设点裁剪最低点的范围,并在范围中随机选择
                            y_s_min = max(max_y - trim_size, 0)
                            y_s_max = min(min_y, data_height - trim_size)
                            if y_s_min == y_s_max:
                                y_s = y_s_min
                            else:
                                y_s = np.random.choice(range(y_s_min, y_s_max))
                        # bbox的最大距离 >= 裁剪范围
                        else:
                            y_s_add = int((box_region - trim_size) / 2)
                            # 刚好相等
                            if y_s_add == 0:
                                y_s = min_y
                            # bbox的最大距离 > 裁剪范围
                            else:
                                y_s = np.random.choice(
                                    range(min_y, min_y + y_s_add))
                    # crop the image
                    # 进行裁剪,按照以上原则,保证长宽比确定,->尽可能多的包含bbox的面积
                    data = data[:, y_s:(y_s + trim_size), :, :]

                    # bbox的坐标跟随着裁剪进行变更
                    # shift y coordiante of gt_boxes
                    gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                    gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                    # update gt bounding box according the trip
                    # 防止超出图片的边界(bbox的最大距离 > 裁剪范围)的情况下
                    gt_boxes[:, 1].clamp_(0, trim_size - 1)
                    gt_boxes[:, 3].clamp_(0, trim_size - 1)
                # 如果是宽图片,类似操作
                else:
                    # this means that data_width >> data_height, we need to crop the
                    # data_width
                    min_x = int(torch.min(gt_boxes[:, 0]))
                    max_x = int(torch.max(gt_boxes[:, 2]))
                    trim_size = int(np.ceil(data_height * ratio))
                    if trim_size > data_width:
                        trim_size = data_width
                    box_region = max_x - min_x + 1
                    if min_x == 0:
                        x_s = 0
                    else:
                        if (box_region - trim_size) < 0:
                            x_s_min = max(max_x - trim_size, 0)
                            x_s_max = min(min_x, data_width - trim_size)
                            if x_s_min == x_s_max:
                                x_s = x_s_min
                            else:
                                x_s = np.random.choice(range(x_s_min, x_s_max))
                        else:
                            x_s_add = int((box_region - trim_size) / 2)
                            if x_s_add == 0:
                                x_s = min_x
                            else:
                                x_s = np.random.choice(
                                    range(min_x, min_x + x_s_add))
                    # crop the image
                    data = data[:, :, x_s:(x_s + trim_size), :]

                    # shift x coordiante of gt_boxes
                    gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                    gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                    # update gt bounding box according the trip
                    gt_boxes[:, 0].clamp_(0, trim_size - 1)
                    gt_boxes[:, 2].clamp_(0, trim_size - 1)

            # based on the ratio, padding the image.
            # 进行图像的拉伸
            # 高图片
            if ratio < 1:
                # this means that data_width < data_height
                trim_size = int(np.floor(data_width / ratio))

                # 创建一个矩阵(高*宽*3),但是之前不是裁剪过了??
                padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                                 data_width, 3).zero_()
                # 有什么区别?? data_height 和 np.ceil(data_width / ratio)
                padding_data[:data_height, :, :] = data[0]
                # update im_info
                # 更改图片信息
                im_info[0, 0] = padding_data.size(0)
                # print("height %d %d \n" %(index, anchor_idx))
            # 宽图片
            elif ratio > 1:
                # this means that data_width > data_height
                # if the image need to crop.
                padding_data = torch.FloatTensor(data_height, \
                                                 int(np.ceil(data_height * ratio)), 3).zero_()
                padding_data[:, :data_width, :] = data[0]
                im_info[0, 1] = padding_data.size(1)
            else:
                trim_size = min(data_height, data_width)
                padding_data = torch.FloatTensor(trim_size, trim_size,
                                                 3).zero_()
                padding_data = data[0][:trim_size, :trim_size, :]
                # gt_boxes.clamp_(0, trim_size)
                gt_boxes[:, :4].clamp_(0, trim_size)
                im_info[0, 0] = trim_size
                im_info[0, 1] = trim_size

            # check the bounding box:
            # 选出有面积的bbox,形成列表
            not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1]
                                                             == gt_boxes[:, 3])
            keep = torch.nonzero(not_keep == 0).view(-1)

            # 创建数组(bbox的数量 * 维度(5)),初始化未0
            gt_boxes_padding = torch.FloatTensor(self.max_num_box,
                                                 gt_boxes.size(1)).zero_()
            # 如果keep张量的元素个数不为0
            if keep.numel() != 0:
                # 取出bbox的值
                gt_boxes = gt_boxes[keep]
                # 取出bbox的数量
                num_boxes = min(gt_boxes.size(0), self.max_num_box)
                # 写入张量中
                gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes]
            else:
                num_boxes = 0

                # permute trim_data to adapt to downstream processing
            # 进行维度转化,通道数放在最前
            # view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy
            padding_data = padding_data.permute(2, 0, 1).contiguous()
            im_info = im_info.view(3)
            '''
        # 返回的是什么
        # padding_data 图像数据
        # im_info
        # gt_boxes_padding -> bbox的5个标注
        # num_boxes -> bbox的数量
        # need_backprop -> 是否需要反向传播
        '''
            return padding_data, im_info, gt_boxes_padding,num_boxes,\
                   need_backprop
        # 不是训练过程 -> 并不加载GT
        else:

            data = data.permute(0, 3, 1,
                                2).contiguous().view(3, data_height,
                                                     data_width)
            im_info = im_info.view(3)

            gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
            num_boxes = 0
            need_backprop = 0

            return data, im_info, gt_boxes, num_boxes, need_backprop
示例#30
0
  def __getitem__(self, index):
    if self.training:
        index_ratio = int(self.ratio_index[index])
    else:
        index_ratio = index

    # get the anchor index for current sample index
    # here we set the anchor index to the last one
    # sample in this group
    minibatch_db = [self._roidb[index_ratio]]
    blobs = get_minibatch(minibatch_db, self._num_classes)
    data = torch.from_numpy(blobs['data'])
    im_info = torch.from_numpy(blobs['im_info'])
    # we need to random shuffle the bounding box.
    data_height, data_width = data.size(1), data.size(2)
    if self.training:
        np.random.shuffle(blobs['gt_boxes'])
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])

        ########################################################
        # padding the input image to fixed size for each group #
        ########################################################

        # NOTE1: need to cope with the case where a group cover both conditions. (done)
        # NOTE2: need to consider the situation for the tail samples. (no worry)
        # NOTE3: need to implement a parallel data loader. (no worry)
        # get the index range

        # if the image need to crop, crop to the target size.
        ratio = self.ratio_list_batch[index]

        if self._roidb[index_ratio]['need_crop']:
            if ratio < 1:
                # this means that data_width << data_height, we need to crop the
                # data_height
                min_y = int(torch.min(gt_boxes[:,1]))
                max_y = int(torch.max(gt_boxes[:,3]))
                trim_size = int(np.floor(data_width / ratio))
                if trim_size > data_height:
                    trim_size = data_height                
                box_region = max_y - min_y + 1
                if min_y == 0:
                    y_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        y_s_min = max(max_y-trim_size, 0)
                        y_s_max = min(min_y, data_height-trim_size)
                        if y_s_min == y_s_max:
                            y_s = y_s_min
                        else:
                            y_s = np.random.choice(range(y_s_min, y_s_max))
                    else:
                        y_s_add = int((box_region-trim_size)/2)
                        if y_s_add == 0:
                            y_s = min_y
                        else:
                            y_s = np.random.choice(range(min_y, min_y+y_s_add))
                # crop the image
                data = data[:, y_s:(y_s + trim_size), :, :]

                # shift y coordiante of gt_boxes
                gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
                gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)

                # update gt bounding box according the trip
                gt_boxes[:, 1].clamp_(0, trim_size - 1)
                gt_boxes[:, 3].clamp_(0, trim_size - 1)

            else:
                # this means that data_width >> data_height, we need to crop the
                # data_width
                min_x = int(torch.min(gt_boxes[:,0]))
                max_x = int(torch.max(gt_boxes[:,2]))
                trim_size = int(np.ceil(data_height * ratio))
                if trim_size > data_width:
                    trim_size = data_width                
                box_region = max_x - min_x + 1
                if min_x == 0:
                    x_s = 0
                else:
                    if (box_region-trim_size) < 0:
                        x_s_min = max(max_x-trim_size, 0)
                        x_s_max = min(min_x, data_width-trim_size)
                        if x_s_min == x_s_max:
                            x_s = x_s_min
                        else:
                            x_s = np.random.choice(range(x_s_min, x_s_max))
                    else:
                        x_s_add = int((box_region-trim_size)/2)
                        if x_s_add == 0:
                            x_s = min_x
                        else:
                            x_s = np.random.choice(range(min_x, min_x+x_s_add))
                # crop the image
                data = data[:, :, x_s:(x_s + trim_size), :]

                # shift x coordiante of gt_boxes
                gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
                gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
                # update gt bounding box according the trip
                gt_boxes[:, 0].clamp_(0, trim_size - 1)
                gt_boxes[:, 2].clamp_(0, trim_size - 1)

        # based on the ratio, padding the image.
        if ratio < 1:
            # this means that data_width < data_height
            trim_size = int(np.floor(data_width / ratio))

            padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
                                             data_width, 3).zero_()

            padding_data[:data_height, :, :] = data[0]
            # update im_info
            im_info[0, 0] = padding_data.size(0)
            # print("height %d %d \n" %(index, anchor_idx))
        elif ratio > 1:
            # this means that data_width > data_height
            # if the image need to crop.
            padding_data = torch.FloatTensor(data_height, \
                                             int(np.ceil(data_height * ratio)), 3).zero_()
            padding_data[:, :data_width, :] = data[0]
            im_info[0, 1] = padding_data.size(1)
        else:
            trim_size = min(data_height, data_width)
            padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
            padding_data = data[0][:trim_size, :trim_size, :]
            # gt_boxes.clamp_(0, trim_size)
            gt_boxes[:, :4].clamp_(0, trim_size)
            im_info[0, 0] = trim_size
            im_info[0, 1] = trim_size


        # check the bounding box:
        not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3])
        keep = torch.nonzero(not_keep == 0).view(-1)

        gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_()
        if keep.numel() != 0:
            gt_boxes = gt_boxes[keep]
            num_boxes = min(gt_boxes.size(0), self.max_num_box)
            gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes]
        else:
            num_boxes = 0

            # permute trim_data to adapt to downstream processing
        padding_data = padding_data.permute(2, 0, 1).contiguous()
        im_info = im_info.view(3)

        return padding_data, im_info, gt_boxes_padding, num_boxes
    else:
        data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)

        gt_boxes = torch.FloatTensor([1,1,1,1,1])
        num_boxes = 0

        return data, im_info, gt_boxes, num_boxes
示例#31
0
 def restore(cls, data):
     data = data.permute(1, 2, 0).to('cpu').data.numpy()
     data = data * 255.
     data += cls.RGB_MEAN
     return data.astype(np.uint8)